From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: shader: Remove old shader management --- .../engines/const_buffer_engine_interface.h | 103 --------------------- src/video_core/engines/kepler_compute.cpp | 44 +-------- src/video_core/engines/kepler_compute.h | 20 +--- src/video_core/engines/maxwell_3d.cpp | 38 -------- src/video_core/engines/maxwell_3d.h | 20 +--- 5 files changed, 3 insertions(+), 222 deletions(-) delete mode 100644 src/video_core/engines/const_buffer_engine_interface.h (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,7 +10,6 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" @@ -40,7 +39,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +208,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,7 +17,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" @@ -49,7 +48,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -1424,23 +1423,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/shader_recompiler/CMakeLists.txt | 13 +- .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 117 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 419 ++-- .../spirv/emit_spirv_bitwise_conversion.cpp | 24 +- .../backend/spirv/emit_spirv_composite.cpp | 48 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 42 +- .../backend/spirv/emit_spirv_control_flow.cpp | 10 +- .../backend/spirv/emit_spirv_floating_point.cpp | 92 +- .../backend/spirv/emit_spirv_integer.cpp | 60 +- .../backend/spirv/emit_spirv_logical.cpp | 40 +- .../backend/spirv/emit_spirv_memory.cpp | 56 +- .../backend/spirv/emit_spirv_select.cpp | 8 +- .../backend/spirv/emit_spirv_undefined.cpp | 10 +- src/shader_recompiler/environment.h | 6 +- src/shader_recompiler/file_environment.cpp | 6 +- src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 + src/shader_recompiler/frontend/ir/post_order.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 + .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 35 +- .../maxwell/translate/impl/not_implemented.cpp | 4 - src/shader_recompiler/main.cpp | 2 +- src/shader_recompiler/profile.h | 13 + src/shader_recompiler/recompiler.cpp | 27 + src/shader_recompiler/recompiler.h | 18 + src/video_core/CMakeLists.txt | 6 +- src/video_core/engines/kepler_compute.h | 1 - src/video_core/engines/shader_bytecode.h | 2298 -------------------- src/video_core/engines/shader_header.h | 158 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 +- .../renderer_vulkan/vk_compute_pipeline.h | 43 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 + .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- src/video_core/renderer_vulkan/vk_resource_pool.h | 12 +- 43 files changed, 1003 insertions(+), 3036 deletions(-) create mode 100644 src/shader_recompiler/profile.h create mode 100644 src/shader_recompiler/recompiler.cpp create mode 100644 src/shader_recompiler/recompiler.h delete mode 100644 src/video_core/engines/shader_bytecode.h delete mode 100644 src/video_core/engines/shader_header.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h (limited to 'src/video_core/engines') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 84be94a8d..b56bdd3d9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(shader_recompiler +add_library(shader_recompiler STATIC backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -85,13 +85,19 @@ add_executable(shader_recompiler ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp - main.cpp object_pool.h + profile.h + recompiler.cpp + recompiler.h shader_info.h ) -target_include_directories(video_core PRIVATE sirit) +target_include_directories(shader_recompiler PRIVATE sirit) target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) +target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) + +add_executable(shader_util main.cpp) +target_link_libraries(shader_util PRIVATE shader_recompiler) if (MSVC) target_compile_options(shader_recompiler PRIVATE @@ -121,3 +127,4 @@ else() endif() create_target_directory_groups(shader_recompiler) +create_target_directory_groups(shader_util) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 1c985aff8..770067d98 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -115,6 +115,7 @@ void EmitContext::DefineConstantBuffers(const Info& info) { for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); binding += desc.count; @@ -143,6 +144,7 @@ void EmitContext::DefineStorageBuffers(const Info& info) { for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", binding)); std::fill_n(ssbos.data() + binding, desc.count, id); binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 55018332e..d59718435 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -2,8 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include #include +#include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -14,10 +17,10 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits : FuncTraits {}; -template -struct FuncTraits { +template +struct FuncTraits { using ReturnType = ReturnType_; static constexpr size_t NUM_ARGS = sizeof...(Args); @@ -26,15 +29,15 @@ struct FuncTraits { using ArgType = std::tuple_element_t>; }; -template -void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { const Id forward_id{inst->Definition()}; const bool has_forward_id{Sirit::ValidId(forward_id)}; Id current_id{}; if (has_forward_id) { current_id = ctx.ExchangeCurrentId(forward_id); } - const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; + const Id new_id{func(ctx, std::forward(args)...)}; if (has_forward_id) { ctx.ExchangeCurrentId(current_id); } else { @@ -55,42 +58,62 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(emit, ctx, inst, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(emit, ctx, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - (emit.*method)(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - (emit.*method)(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { - Invoke(emit, ctx, inst, std::make_index_sequence<0>{}); + Invoke(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; static constexpr bool is_first_arg_inst = std::is_same_v; using Indices = std::make_index_sequence; - Invoke(emit, ctx, inst, Indices{}); + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); } } } // Anonymous namespace -EmitSPIRV::EmitSPIRV(IR::Program& program) { +std::vector EmitSPIRV(Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -112,43 +135,17 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { if (program.info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); - - std::vector result{ctx.Assemble()}; - std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; - std::fwrite(result.data(), sizeof(u32), result.size(), file); - std::fclose(file); - std::system("spirv-dis D:\\shader.spv") == 0 && - std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && - std::system("spirv-cross -V D:\\shader.spv") == 0; -} + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); -void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { -#define OPCODE(name, result_type, ...) \ - case IR::Opcode::name: \ - return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE - } - throw LogicError("Invalid opcode {}", inst->Opcode()); -} + const std::array workgroup_size{env.WorkgroupSize()}; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], + workgroup_size[2]); -static Id TypeId(const EmitContext& ctx, IR::Type type) { - switch (type) { - case IR::Type::U1: - return ctx.U1; - case IR::Type::U32: - return ctx.U32[1]; - default: - throw NotImplementedException("Phi node type {}", type); - } + return ctx.Assemble(); } -Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; boost::container::small_vector operands; operands.reserve(num_args * 2); @@ -178,25 +175,25 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } -void EmitSPIRV::EmitVoid(EmitContext&) {} +void EmitVoid(EmitContext&) {} -Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) { +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return ctx.Def(value); } -void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { +void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { +void EmitGetSignFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { +void EmitGetCarryFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { +void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8bde82613..5813f51ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,223 +8,218 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -class EmitSPIRV { -public: - explicit EmitSPIRV(IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); -private: - void EmitInst(EmitContext& ctx, IR::Inst* inst); - - // Microinstruction emitters - Id EmitPhi(EmitContext& ctx, IR::Inst* inst); - void EmitVoid(EmitContext& ctx); - Id EmitIdentity(EmitContext& ctx, const IR::Value& value); - void EmitBranch(EmitContext& ctx, IR::Block* label); - void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); - void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); - void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); - void EmitReturn(EmitContext& ctx); - void EmitGetRegister(EmitContext& ctx); - void EmitSetRegister(EmitContext& ctx); - void EmitGetPred(EmitContext& ctx); - void EmitSetPred(EmitContext& ctx); - void EmitSetGotoVariable(EmitContext& ctx); - void EmitGetGotoVariable(EmitContext& ctx); - Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitGetAttribute(EmitContext& ctx); - void EmitSetAttribute(EmitContext& ctx); - void EmitGetAttributeIndexed(EmitContext& ctx); - void EmitSetAttributeIndexed(EmitContext& ctx); - void EmitGetZFlag(EmitContext& ctx); - void EmitGetSFlag(EmitContext& ctx); - void EmitGetCFlag(EmitContext& ctx); - void EmitGetOFlag(EmitContext& ctx); - void EmitSetZFlag(EmitContext& ctx); - void EmitSetSFlag(EmitContext& ctx); - void EmitSetCFlag(EmitContext& ctx); - void EmitSetOFlag(EmitContext& ctx); - Id EmitWorkgroupId(EmitContext& ctx); - Id EmitLocalInvocationId(EmitContext& ctx); - Id EmitUndefU1(EmitContext& ctx); - Id EmitUndefU8(EmitContext& ctx); - Id EmitUndefU16(EmitContext& ctx); - Id EmitUndefU32(EmitContext& ctx); - Id EmitUndefU64(EmitContext& ctx); - void EmitLoadGlobalU8(EmitContext& ctx); - void EmitLoadGlobalS8(EmitContext& ctx); - void EmitLoadGlobalU16(EmitContext& ctx); - void EmitLoadGlobalS16(EmitContext& ctx); - void EmitLoadGlobal32(EmitContext& ctx); - void EmitLoadGlobal64(EmitContext& ctx); - void EmitLoadGlobal128(EmitContext& ctx); - void EmitWriteGlobalU8(EmitContext& ctx); - void EmitWriteGlobalS8(EmitContext& ctx); - void EmitWriteGlobalU16(EmitContext& ctx); - void EmitWriteGlobalS16(EmitContext& ctx); - void EmitWriteGlobal32(EmitContext& ctx); - void EmitWriteGlobal64(EmitContext& ctx); - void EmitWriteGlobal128(EmitContext& ctx); - void EmitLoadStorageU8(EmitContext& ctx); - void EmitLoadStorageS8(EmitContext& ctx); - void EmitLoadStorageU16(EmitContext& ctx); - void EmitLoadStorageS16(EmitContext& ctx); - Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitLoadStorage64(EmitContext& ctx); - void EmitLoadStorage128(EmitContext& ctx); - void EmitWriteStorageU8(EmitContext& ctx); - void EmitWriteStorageS8(EmitContext& ctx); - void EmitWriteStorageU16(EmitContext& ctx); - void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); - void EmitWriteStorage64(EmitContext& ctx); - void EmitWriteStorage128(EmitContext& ctx); - void EmitCompositeConstructU32x2(EmitContext& ctx); - void EmitCompositeConstructU32x3(EmitContext& ctx); - void EmitCompositeConstructU32x4(EmitContext& ctx); - void EmitCompositeExtractU32x2(EmitContext& ctx); - Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); - void EmitCompositeExtractU32x4(EmitContext& ctx); - void EmitCompositeConstructF16x2(EmitContext& ctx); - void EmitCompositeConstructF16x3(EmitContext& ctx); - void EmitCompositeConstructF16x4(EmitContext& ctx); - void EmitCompositeExtractF16x2(EmitContext& ctx); - void EmitCompositeExtractF16x3(EmitContext& ctx); - void EmitCompositeExtractF16x4(EmitContext& ctx); - void EmitCompositeConstructF32x2(EmitContext& ctx); - void EmitCompositeConstructF32x3(EmitContext& ctx); - void EmitCompositeConstructF32x4(EmitContext& ctx); - void EmitCompositeExtractF32x2(EmitContext& ctx); - void EmitCompositeExtractF32x3(EmitContext& ctx); - void EmitCompositeExtractF32x4(EmitContext& ctx); - void EmitCompositeConstructF64x2(EmitContext& ctx); - void EmitCompositeConstructF64x3(EmitContext& ctx); - void EmitCompositeConstructF64x4(EmitContext& ctx); - void EmitCompositeExtractF64x2(EmitContext& ctx); - void EmitCompositeExtractF64x3(EmitContext& ctx); - void EmitCompositeExtractF64x4(EmitContext& ctx); - void EmitSelect8(EmitContext& ctx); - void EmitSelect16(EmitContext& ctx); - void EmitSelect32(EmitContext& ctx); - void EmitSelect64(EmitContext& ctx); - void EmitBitCastU16F16(EmitContext& ctx); - Id EmitBitCastU32F32(EmitContext& ctx, Id value); - void EmitBitCastU64F64(EmitContext& ctx); - void EmitBitCastF16U16(EmitContext& ctx); - Id EmitBitCastF32U32(EmitContext& ctx, Id value); - void EmitBitCastF64U64(EmitContext& ctx); - void EmitPackUint2x32(EmitContext& ctx); - void EmitUnpackUint2x32(EmitContext& ctx); - void EmitPackFloat2x16(EmitContext& ctx); - void EmitUnpackFloat2x16(EmitContext& ctx); - void EmitPackDouble2x32(EmitContext& ctx); - void EmitUnpackDouble2x32(EmitContext& ctx); - void EmitGetZeroFromOp(EmitContext& ctx); - void EmitGetSignFromOp(EmitContext& ctx); - void EmitGetCarryFromOp(EmitContext& ctx); - void EmitGetOverflowFromOp(EmitContext& ctx); - void EmitFPAbs16(EmitContext& ctx); - void EmitFPAbs32(EmitContext& ctx); - void EmitFPAbs64(EmitContext& ctx); - Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - void EmitFPMax32(EmitContext& ctx); - void EmitFPMax64(EmitContext& ctx); - void EmitFPMin32(EmitContext& ctx); - void EmitFPMin64(EmitContext& ctx); - Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitFPNeg16(EmitContext& ctx); - void EmitFPNeg32(EmitContext& ctx); - void EmitFPNeg64(EmitContext& ctx); - void EmitFPRecip32(EmitContext& ctx); - void EmitFPRecip64(EmitContext& ctx); - void EmitFPRecipSqrt32(EmitContext& ctx); - void EmitFPRecipSqrt64(EmitContext& ctx); - void EmitFPSqrt(EmitContext& ctx); - void EmitFPSin(EmitContext& ctx); - void EmitFPSinNotReduced(EmitContext& ctx); - void EmitFPExp2(EmitContext& ctx); - void EmitFPExp2NotReduced(EmitContext& ctx); - void EmitFPCos(EmitContext& ctx); - void EmitFPCosNotReduced(EmitContext& ctx); - void EmitFPLog2(EmitContext& ctx); - void EmitFPSaturate16(EmitContext& ctx); - void EmitFPSaturate32(EmitContext& ctx); - void EmitFPSaturate64(EmitContext& ctx); - void EmitFPRoundEven16(EmitContext& ctx); - void EmitFPRoundEven32(EmitContext& ctx); - void EmitFPRoundEven64(EmitContext& ctx); - void EmitFPFloor16(EmitContext& ctx); - void EmitFPFloor32(EmitContext& ctx); - void EmitFPFloor64(EmitContext& ctx); - void EmitFPCeil16(EmitContext& ctx); - void EmitFPCeil32(EmitContext& ctx); - void EmitFPCeil64(EmitContext& ctx); - void EmitFPTrunc16(EmitContext& ctx); - void EmitFPTrunc32(EmitContext& ctx); - void EmitFPTrunc64(EmitContext& ctx); - Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitIAdd64(EmitContext& ctx); - Id EmitISub32(EmitContext& ctx, Id a, Id b); - void EmitISub64(EmitContext& ctx); - Id EmitIMul32(EmitContext& ctx, Id a, Id b); - void EmitINeg32(EmitContext& ctx); - void EmitIAbs32(EmitContext& ctx); - Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); - void EmitShiftRightLogical32(EmitContext& ctx); - void EmitShiftRightArithmetic32(EmitContext& ctx); - void EmitBitwiseAnd32(EmitContext& ctx); - void EmitBitwiseOr32(EmitContext& ctx); - void EmitBitwiseXor32(EmitContext& ctx); - void EmitBitFieldInsert(EmitContext& ctx); - void EmitBitFieldSExtract(EmitContext& ctx); - Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); - Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitULessThan(EmitContext& ctx); - void EmitIEqual(EmitContext& ctx); - void EmitSLessThanEqual(EmitContext& ctx); - void EmitULessThanEqual(EmitContext& ctx); - Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitUGreaterThan(EmitContext& ctx); - void EmitINotEqual(EmitContext& ctx); - void EmitSGreaterThanEqual(EmitContext& ctx); - Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); - void EmitLogicalOr(EmitContext& ctx); - void EmitLogicalAnd(EmitContext& ctx); - void EmitLogicalXor(EmitContext& ctx); - void EmitLogicalNot(EmitContext& ctx); - void EmitConvertS16F16(EmitContext& ctx); - void EmitConvertS16F32(EmitContext& ctx); - void EmitConvertS16F64(EmitContext& ctx); - void EmitConvertS32F16(EmitContext& ctx); - void EmitConvertS32F32(EmitContext& ctx); - void EmitConvertS32F64(EmitContext& ctx); - void EmitConvertS64F16(EmitContext& ctx); - void EmitConvertS64F32(EmitContext& ctx); - void EmitConvertS64F64(EmitContext& ctx); - void EmitConvertU16F16(EmitContext& ctx); - void EmitConvertU16F32(EmitContext& ctx); - void EmitConvertU16F64(EmitContext& ctx); - void EmitConvertU32F16(EmitContext& ctx); - void EmitConvertU32F32(EmitContext& ctx); - void EmitConvertU32F64(EmitContext& ctx); - void EmitConvertU64F16(EmitContext& ctx); - void EmitConvertU64F32(EmitContext& ctx); - void EmitConvertU64F64(EmitContext& ctx); - void EmitConvertU64U32(EmitContext& ctx); - void EmitConvertU32U64(EmitContext& ctx); -}; +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, IR::Block* label); +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitReturn(EmitContext& ctx); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetAttributeIndexed(EmitContext& ctx); +void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx); +void EmitLoadGlobal64(EmitContext& ctx); +void EmitLoadGlobal128(EmitContext& ctx); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx); +void EmitWriteGlobal64(EmitContext& ctx); +void EmitWriteGlobal128(EmitContext& ctx); +void EmitLoadStorageU8(EmitContext& ctx); +void EmitLoadStorageS8(EmitContext& ctx); +void EmitLoadStorageU16(EmitContext& ctx); +void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx); +void EmitLoadStorage128(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx); +void EmitWriteStorageS8(EmitContext& ctx); +void EmitWriteStorageU16(EmitContext& ctx); +void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx); +void EmitCompositeConstructU32x2(EmitContext& ctx); +void EmitCompositeConstructU32x3(EmitContext& ctx); +void EmitCompositeConstructU32x4(EmitContext& ctx); +void EmitCompositeExtractU32x2(EmitContext& ctx); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx); +void EmitCompositeConstructF16x2(EmitContext& ctx); +void EmitCompositeConstructF16x3(EmitContext& ctx); +void EmitCompositeConstructF16x4(EmitContext& ctx); +void EmitCompositeExtractF16x2(EmitContext& ctx); +void EmitCompositeExtractF16x3(EmitContext& ctx); +void EmitCompositeExtractF16x4(EmitContext& ctx); +void EmitCompositeConstructF32x2(EmitContext& ctx); +void EmitCompositeConstructF32x3(EmitContext& ctx); +void EmitCompositeConstructF32x4(EmitContext& ctx); +void EmitCompositeExtractF32x2(EmitContext& ctx); +void EmitCompositeExtractF32x3(EmitContext& ctx); +void EmitCompositeExtractF32x4(EmitContext& ctx); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitSelect8(EmitContext& ctx); +void EmitSelect16(EmitContext& ctx); +void EmitSelect32(EmitContext& ctx); +void EmitSelect64(EmitContext& ctx); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx); +void EmitUnpackUint2x32(EmitContext& ctx); +void EmitPackFloat2x16(EmitContext& ctx); +void EmitUnpackFloat2x16(EmitContext& ctx); +void EmitPackDouble2x32(EmitContext& ctx); +void EmitUnpackDouble2x32(EmitContext& ctx); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx); +void EmitFPAbs32(EmitContext& ctx); +void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +void EmitFPMax32(EmitContext& ctx); +void EmitFPMax64(EmitContext& ctx); +void EmitFPMin32(EmitContext& ctx); +void EmitFPMin64(EmitContext& ctx); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitFPNeg16(EmitContext& ctx); +void EmitFPNeg32(EmitContext& ctx); +void EmitFPNeg64(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx); +void EmitFPRecip64(EmitContext& ctx); +void EmitFPRecipSqrt32(EmitContext& ctx); +void EmitFPRecipSqrt64(EmitContext& ctx); +void EmitFPSqrt(EmitContext& ctx); +void EmitFPSin(EmitContext& ctx); +void EmitFPSinNotReduced(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPExp2NotReduced(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPCosNotReduced(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPRoundEven16(EmitContext& ctx); +void EmitFPRoundEven32(EmitContext& ctx); +void EmitFPRoundEven64(EmitContext& ctx); +void EmitFPFloor16(EmitContext& ctx); +void EmitFPFloor32(EmitContext& ctx); +void EmitFPFloor64(EmitContext& ctx); +void EmitFPCeil16(EmitContext& ctx); +void EmitFPCeil32(EmitContext& ctx); +void EmitFPCeil64(EmitContext& ctx); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitIAdd64(EmitContext& ctx); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +void EmitISub64(EmitContext& ctx); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +void EmitINeg32(EmitContext& ctx); +void EmitIAbs32(EmitContext& ctx); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +void EmitShiftRightLogical32(EmitContext& ctx); +void EmitShiftRightArithmetic32(EmitContext& ctx); +void EmitBitwiseAnd32(EmitContext& ctx); +void EmitBitwiseOr32(EmitContext& ctx); +void EmitBitwiseXor32(EmitContext& ctx); +void EmitBitFieldInsert(EmitContext& ctx); +void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitULessThan(EmitContext& ctx); +void EmitIEqual(EmitContext& ctx); +void EmitSLessThanEqual(EmitContext& ctx); +void EmitULessThanEqual(EmitContext& ctx); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitUGreaterThan(EmitContext& ctx); +void EmitINotEqual(EmitContext& ctx); +void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +void EmitLogicalOr(EmitContext& ctx); +void EmitLogicalAnd(EmitContext& ctx); +void EmitLogicalXor(EmitContext& ctx); +void EmitLogicalNot(EmitContext& ctx); +void EmitConvertS16F16(EmitContext& ctx); +void EmitConvertS16F32(EmitContext& ctx); +void EmitConvertS16F64(EmitContext& ctx); +void EmitConvertS32F16(EmitContext& ctx); +void EmitConvertS32F32(EmitContext& ctx); +void EmitConvertS32F64(EmitContext& ctx); +void EmitConvertS64F16(EmitContext& ctx); +void EmitConvertS64F32(EmitContext& ctx); +void EmitConvertS64F64(EmitContext& ctx); +void EmitConvertU16F16(EmitContext& ctx); +void EmitConvertU16F32(EmitContext& ctx); +void EmitConvertU16F64(EmitContext& ctx); +void EmitConvertU32F16(EmitContext& ctx); +void EmitConvertU32F32(EmitContext& ctx); +void EmitConvertU32F64(EmitContext& ctx); +void EmitConvertU64F16(EmitContext& ctx); +void EmitConvertU64F32(EmitContext& ctx); +void EmitConvertU64F64(EmitContext& ctx); +void EmitConvertU64U32(EmitContext& ctx); +void EmitConvertU32U64(EmitContext& ctx); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index af82df99c..49c200498 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -6,51 +6,51 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { +void EmitBitCastU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { +void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { +void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { +void EmitBitCastF64U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackUint2x32(EmitContext&) { +void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { +void EmitUnpackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { +void EmitPackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { +void EmitUnpackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { +void EmitPackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { +void EmitUnpackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index a7374c89d..348e4796d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,99 +6,99 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { +void EmitCompositeConstructU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { +void EmitCompositeConstructU32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { +void EmitCompositeConstructU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { +void EmitCompositeExtractU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } -void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { +void EmitCompositeExtractU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { +void EmitCompositeConstructF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { +void EmitCompositeConstructF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { +void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { +void EmitCompositeExtractF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { +void EmitCompositeExtractF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { +void EmitCompositeExtractF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { +void EmitCompositeConstructF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { +void EmitCompositeConstructF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { +void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { +void EmitCompositeExtractF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { +void EmitCompositeExtractF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { +void EmitCompositeExtractF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { +void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { +void EmitCompositeConstructF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { +void EmitCompositeConstructF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { +void EmitCompositeExtractF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { +void EmitCompositeExtractF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { +void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f4c9970eb..eb9c01c5a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,31 +6,31 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitGetRegister(EmitContext&) { +void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetRegister(EmitContext&) { +void EmitSetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetPred(EmitContext&) { +void EmitGetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetPred(EmitContext&) { +void EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { +void EmitSetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { +void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -43,59 +43,59 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: return ctx.OpLoad(ctx.U32[1], access_chain); } -void EmitSPIRV::EmitGetAttribute(EmitContext&) { +void EmitGetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttribute(EmitContext&) { +void EmitSetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { +void EmitGetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { +void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetZFlag(EmitContext&) { +void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetSFlag(EmitContext&) { +void EmitGetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetCFlag(EmitContext&) { +void EmitGetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetOFlag(EmitContext&) { +void EmitGetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetZFlag(EmitContext&) { +void EmitSetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetSFlag(EmitContext&) { +void EmitSetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetCFlag(EmitContext&) { +void EmitSetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetOFlag(EmitContext&) { +void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { +Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } -Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { +Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 549c1907a..6c4199664 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,25 +6,25 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { +void EmitBranch(EmitContext& ctx, IR::Block* label) { ctx.OpBranch(label->Definition()); } -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), spv::LoopControlMask::MaskNone); } -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); } -void EmitSPIRV::EmitReturn(EmitContext& ctx) { +void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9bc121f8..d24fbb353 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -33,187 +33,187 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { } // Anonymous namespace -void EmitSPIRV::EmitFPAbs16(EmitContext&) { +void EmitFPAbs16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs32(EmitContext&) { +void EmitFPAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs64(EmitContext&) { +void EmitFPAbs64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } -Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitSPIRV::EmitFPMax32(EmitContext&) { +void EmitFPMax32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMax64(EmitContext&) { +void EmitFPMax64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin32(EmitContext&) { +void EmitFPMin32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin64(EmitContext&) { +void EmitFPMin64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitSPIRV::EmitFPNeg16(EmitContext&) { +void EmitFPNeg16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg32(EmitContext&) { +void EmitFPNeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg64(EmitContext&) { +void EmitFPNeg64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip32(EmitContext&) { +void EmitFPRecip32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip64(EmitContext&) { +void EmitFPRecip64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { +void EmitFPRecipSqrt32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { +void EmitFPRecipSqrt64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSqrt(EmitContext&) { +void EmitFPSqrt(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSin(EmitContext&) { +void EmitFPSin(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { +void EmitFPSinNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2(EmitContext&) { +void EmitFPExp2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { +void EmitFPExp2NotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCos(EmitContext&) { +void EmitFPCos(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { +void EmitFPCosNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPLog2(EmitContext&) { +void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate16(EmitContext&) { +void EmitFPSaturate16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate32(EmitContext&) { +void EmitFPSaturate32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate64(EmitContext&) { +void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { +void EmitFPRoundEven16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { +void EmitFPRoundEven32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { +void EmitFPRoundEven64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor16(EmitContext&) { +void EmitFPFloor16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor32(EmitContext&) { +void EmitFPFloor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor64(EmitContext&) { +void EmitFPFloor64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil16(EmitContext&) { +void EmitFPCeil16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil32(EmitContext&) { +void EmitFPCeil32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil64(EmitContext&) { +void EmitFPCeil64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc16(EmitContext&) { +void EmitFPTrunc16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc32(EmitContext&) { +void EmitFPTrunc32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc64(EmitContext&) { +void EmitFPTrunc64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 32af94a73..a1d16b81e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -6,126 +6,126 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } return ctx.OpIAdd(ctx.U32[1], a, b); } -void EmitSPIRV::EmitIAdd64(EmitContext&) { +void EmitIAdd64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { +Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitSPIRV::EmitISub64(EmitContext&) { +void EmitISub64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } -void EmitSPIRV::EmitINeg32(EmitContext&) { +void EmitINeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIAbs32(EmitContext&) { +void EmitIAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { +void EmitShiftRightLogical32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { +void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { +void EmitBitwiseAnd32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { +void EmitBitwiseOr32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { +void EmitBitwiseXor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { +void EmitBitFieldInsert(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { +void EmitBitFieldSExtract(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } -Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitULessThan(EmitContext&) { +void EmitULessThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIEqual(EmitContext&) { +void EmitIEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { +void EmitSLessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitULessThanEqual(EmitContext&) { +void EmitULessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitUGreaterThan(EmitContext&) { +void EmitUGreaterThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitINotEqual(EmitContext&) { +void EmitINotEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { +void EmitSGreaterThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitLogicalOr(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalAnd(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalXor(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalNot(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index 7b43c4ed8..ff2f4fb74 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,83 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitConvertS16F16(EmitContext&) { +void EmitConvertS16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F32(EmitContext&) { +void EmitConvertS16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F64(EmitContext&) { +void EmitConvertS16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F16(EmitContext&) { +void EmitConvertS32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F32(EmitContext&) { +void EmitConvertS32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F64(EmitContext&) { +void EmitConvertS32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F16(EmitContext&) { +void EmitConvertS64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F32(EmitContext&) { +void EmitConvertS64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F64(EmitContext&) { +void EmitConvertS64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F16(EmitContext&) { +void EmitConvertU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F32(EmitContext&) { +void EmitConvertU16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F64(EmitContext&) { +void EmitConvertU16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F16(EmitContext&) { +void EmitConvertU32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F32(EmitContext&) { +void EmitConvertU32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F64(EmitContext&) { +void EmitConvertU32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F16(EmitContext&) { +void EmitConvertU64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F32(EmitContext&) { +void EmitConvertU64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F64(EmitContext&) { +void EmitConvertU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64U32(EmitContext&) { +void EmitConvertU64U32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32U64(EmitContext&) { +void EmitConvertU32U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 5769a3c95..77d698ffd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,79 +22,79 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { +void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { +void EmitLoadGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { +void EmitLoadGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { +void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { +void EmitLoadGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { +void EmitLoadGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { +void EmitLoadGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { +void EmitWriteGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { +void EmitWriteGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { +void EmitWriteGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { +void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { +void EmitWriteGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { +void EmitWriteGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { +void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { +void EmitLoadStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { +void EmitLoadStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { +void EmitLoadStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { +void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -105,31 +105,31 @@ Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, return ctx.OpLoad(ctx.U32[1], pointer); } -void EmitSPIRV::EmitLoadStorage64(EmitContext&) { +void EmitLoadStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorage128(EmitContext&) { +void EmitLoadStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { +void EmitWriteStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { +void EmitWriteStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { +void EmitWriteStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { +void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -140,11 +140,11 @@ void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitSPIRV::EmitWriteStorage64(EmitContext&) { +void EmitWriteStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage128(EmitContext&) { +void EmitWriteStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 40a856f72..8d5062724 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,19 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitSelect8(EmitContext&) { +void EmitSelect8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect16(EmitContext&) { +void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect32(EmitContext&) { +void EmitSelect32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect64(EmitContext&) { +void EmitSelect64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index c1ed8f281..19b06dbe4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { +Id EmitUndefU1(EmitContext& ctx) { return ctx.OpUndef(ctx.U1); } -Id EmitSPIRV::EmitUndefU8(EmitContext&) { +Id EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU16(EmitContext&) { +Id EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { +Id EmitUndefU32(EmitContext& ctx) { return ctx.OpUndef(ctx.U32[1]); } -Id EmitSPIRV::EmitUndefU64(EmitContext&) { +Id EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index f6230e817..0ba681fb9 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "common/common_types.h" namespace Shader { @@ -8,7 +10,9 @@ class Environment { public: virtual ~Environment() = default; - [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0; + [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index b34bf462b..5127523f9 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -29,7 +29,7 @@ FileEnvironment::FileEnvironment(const char* path) { FileEnvironment::~FileEnvironment() = default; -u64 FileEnvironment::ReadInstruction(u32 offset) const { +u64 FileEnvironment::ReadInstruction(u32 offset) { if (offset % 8 != 0) { throw InvalidArgument("offset={} is not aligned to 8", offset); } @@ -39,4 +39,8 @@ u64 FileEnvironment::ReadInstruction(u32 offset) const { return data[offset / 8]; } +std::array FileEnvironment::WorkgroupSize() { + return {1, 1, 1}; +} + } // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index c294bc6fa..b8c4bbadd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -12,7 +12,9 @@ public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; - u64 ReadInstruction(u32 offset) const override; + u64 ReadInstruction(u32 offset) override; + + std::array WorkgroupSize() override; private: std::vector data; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 5ae91dd7d..ec029dfd6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map& block_to_ind return fmt::format("#{}", arg.U32()); case Type::U64: return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); case Type::Reg: return fmt::format("{}", arg.Reg()); case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index a48b8dec5..8709a2ea1 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) { if (!visited.insert(branch).second) { return false; } - // Calling push_back twice is faster than insert on msvc + // Calling push_back twice is faster than insert on MSVC block_stack.push_back(block); block_stack.push_back(branch); return true; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c..8c44ebb29 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d7..8bd468244 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d1068..c3c4b9abd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acb..6b2a1356b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 1610bb34e..050a37f18 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - Backend::SPIRV::EmitSPIRV spirv{program}; + void(Backend::SPIRV::EmitSPIRV(env, program)); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..c96d783b7 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +struct Profile { + bool unified_descriptor_binding; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp new file mode 100644 index 000000000..b25081e39 --- /dev/null +++ b/src/shader_recompiler/recompiler.cpp @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader { + +std::pair> RecompileSPIRV(Environment& env, u32 start_address) { + ObjectPool flow_block_pool; + ObjectPool inst_pool; + ObjectPool block_pool; + + Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; +} + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h new file mode 100644 index 000000000..4cb973878 --- /dev/null +++ b/src/shader_recompiler/recompiler.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader { + +[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c5ce71706..3323e6916 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -123,6 +120,7 @@ add_library(video_core STATIC renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -201,7 +199,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 0d7683c2d..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,7 +12,6 @@ #include "common/common_types.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast(1U << bit_position); - break; - case '1': - expect |= static_cast(1U << bit_position); - mask |= static_cast(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector GetDecodeTable() { - std::vector table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496..588ce6139 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include +#include + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); +} + +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace + +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); -ComputePipeline::ComputePipeline() = default; + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} -ComputePipeline::~ComputePipeline() = default; +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3d..dc045d524 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..3bea1ff44 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..2501f9967 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..b06288403 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180..4bf3e4819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + + ~Environment() override = default; + + [[nodiscard]] std::optional Analyze(u32 start_address) { + const std::optional size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + } -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(StageFromProgram(static_cast(program))); -} + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast(data.get()), size); } -} + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read(program_base + address); + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector code; + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; + } + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340d..eb35abc27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -67,13 +67,13 @@ struct hash { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map> compute_cache; + std::unordered_map compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d9..b757454c4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b..3fd03b915 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector ticks; ///< Ticks for each resource -- cgit v1.2.3 From c736b9ffabc8a869d8ed131d365aff21b049f751 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:52:40 +0200 Subject: DMA: Restrict optimised path for BlockToLinear further. --- src/video_core/engines/maxwell_dma.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c51776466..c7ec1eac9 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Optimized path for micro copies. const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; - if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && + regs.src_params.height > GOB_SIZE_Y) { FastCopyBlockLinearToPitch(); return; } -- cgit v1.2.3 From d554778311c32e0a19ecdc13d7525b264d8443b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:52:04 -0300 Subject: vulkan: Use VK_EXT_provoking_vertex when available --- src/video_core/engines/maxwell_3d.h | 7 +++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 ++-- .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 +++++++- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 +++++ 6 files changed, 52 insertions(+), 4 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cbf94412b..04d5790f6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1151,7 +1151,11 @@ public: u32 index; } primitive_restart; - INSERT_PADDING_WORDS_NOINIT(0x5F); + INSERT_PADDING_WORDS_NOINIT(0xE); + + u32 provoking_vertex_last; + + INSERT_PADDING_WORDS_NOINIT(0x50); struct { u32 start_addr_high; @@ -1672,6 +1676,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); +ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1486d088a..f121fbf0e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,6 +84,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); @@ -91,8 +93,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 0f1eff9cd..60adae316 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -192,6 +192,7 @@ struct FixedPipelineState { BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; BitField<10, 1, u32> y_negate; + BitField<11, 1, u32> provoking_vertex_last; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5c916c869..06a80c2ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -567,9 +567,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { viewport_ci.pNext = &swizzle_ci; } + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; const VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -586,6 +593,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2b715baba..618535aae 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + if (ext_provoking_vertex) { + provoking_vertex = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .pNext = nullptr, + .provokingVertexLast = VK_TRUE, + .transformFeedbackPreservesProvokingVertex = VK_TRUE, + }; + SetNext(next, provoking_vertex); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -718,6 +731,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; + bool has_ext_provoking_vertex{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -748,6 +762,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -799,6 +814,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_provoking_vertex) { + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; + provoking_vertex.pNext = nullptr; + features.pNext = &provoking_vertex; + physical.GetFeatures2KHR(features); + + if (provoking_vertex.provokingVertexLast && + provoking_vertex.transformFeedbackPreservesProvokingVertex) { + extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + ext_provoking_vertex = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 9bc1fb947..37f589612 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -244,6 +244,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_provoking_vertex. + bool IsExtProvokingVertexSupported() const { + return ext_provoking_vertex; + } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { return ext_shader_atomic_int64; @@ -346,6 +351,7 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From 395bed3a0af90a53be44e81eadd06f4931c8e933 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 02:41:00 -0300 Subject: shader: Unify shader stage types --- src/shader_recompiler/stage.h | 11 +++++++++-- src/video_core/engines/kepler_compute.cpp | 1 - src/video_core/engines/maxwell_3d.cpp | 1 - src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 21 --------------------- src/video_core/renderer_opengl/gl_device.cpp | 18 ++++++++++-------- src/video_core/renderer_opengl/gl_device.h | 11 ++++++----- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 -- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_cache.h | 1 - src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 15 ++++++++------- src/video_core/renderer_vulkan/maxwell_to_vk.h | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- src/video_core/shader_environment.cpp | 2 +- 15 files changed, 37 insertions(+), 55 deletions(-) delete mode 100644 src/video_core/engines/shader_type.h (limited to 'src/video_core/engines') diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index 7d4f2c0bb..5c1c8d8fc 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -9,13 +9,20 @@ namespace Shader { enum class Stage : u32 { - Compute, - VertexA, VertexB, TessellationControl, TessellationEval, Geometry, Fragment, + + Compute, + + VertexA, }; +constexpr u32 MaxStageTypes = 6; + +[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { + return static_cast(static_cast(Stage::VertexB) + index); +} } // namespace Shader diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index cae93c470..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 103a51fd0..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d5790f6..fc2c36c6b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -20,7 +20,6 @@ #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro/macro.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Tegra::Engines { - -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; -static constexpr std::size_t MaxShaderTypes = 6; - -} // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b1b5ba1ab..27be347e6 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -59,16 +60,18 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -std::array BuildMaxUniformBuffers() noexcept { - std::array max; - std::ranges::transform(LIMIT_UBOS, max.begin(), - [](GLenum pname) { return GetInteger(pname); }); +std::array BuildMaxUniformBuffers() noexcept { + std::array max; + std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger); return max; } bool IsASTCSupported() { - static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; - static constexpr std::array formats = { + static constexpr std::array targets{ + GL_TEXTURE_2D, + GL_TEXTURE_2D_ARRAY, + }; + static constexpr std::array formats{ GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, @@ -84,11 +87,10 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - static constexpr std::array required_support = { + static constexpr std::array required_support{ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, }; - for (const GLenum target : targets) { for (const GLenum format : formats) { for (const GLenum support : required_support) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0bd277d38..ad7b01b06 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,7 +6,7 @@ #include #include "common/common_types.h" -#include "video_core/engines/shader_type.h" +#include "shader_recompiler/stage.h" namespace OpenGL { @@ -16,8 +16,8 @@ public: [[nodiscard]] std::string GetVendorName() const; - u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { - return max_uniform_buffers[static_cast(shader_type)]; + u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { + return max_uniform_buffers[static_cast(stage)]; } size_t GetUniformBufferAlignment() const { @@ -148,8 +148,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::string vendor_name; - std::array max_uniform_buffers{}; + std::array max_uniform_buffers{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; @@ -181,6 +180,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + + std::string vendor_name; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e3d336f86..0f0d780b5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -23,7 +23,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" @@ -40,7 +39,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using GLvec4 = std::array; -using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f2f18b18a..5af9b7745 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -26,7 +26,6 @@ #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16873fcec..9d5306293 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -17,7 +17,6 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_context.h" diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f0b0b8ec..8f9b9a11a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 50a599c11..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2b59a9d88..9eb353a88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -737,7 +737,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), .module = *spv_modules[stage], .pName = "main", .pSpecializationInfo = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c57e16c50..f04c3394c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,8 +58,6 @@ struct DrawParams { bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 6243cd176..d463e2b56 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 3; +constexpr u32 CACHE_VERSION = 4; constexpr size_t INST_SIZE = sizeof(u64); -- cgit v1.2.3 From 4f052a1f393d45843eabc237e21757be15f20062 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 03:32:41 -0300 Subject: vk_graphics_pipeline: Implement conservative rendering --- src/video_core/engines/maxwell_3d.h | 7 ++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++++++++++++++------ src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 6 files changed, 44 insertions(+), 10 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index fc2c36c6b..da2ded671 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -910,7 +910,11 @@ public: u32 fill_rectangle; - INSERT_PADDING_WORDS_NOINIT(0x8); + INSERT_PADDING_WORDS_NOINIT(0x2); + + u32 conservative_raster_enable; + + INSERT_PADDING_WORDS_NOINIT(0x5); std::array vertex_attrib_format; @@ -1615,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); +ASSERT_REG_POSITION(conservative_raster_enable, 0x452); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(multisample_sample_locations, 0x478); ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 16cef8711..7563dc462 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -87,6 +87,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, depth_format.Assign(static_cast(regs.zeta.format)); y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04f34eb97..66b57b636 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -194,6 +194,7 @@ struct FixedPipelineState { BitField<6, 5, u32> depth_format; BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9eb353a88..70e183e65 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -599,16 +599,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pScissors = nullptr, }; - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 - ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT - : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, - }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, + .pNext = nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -625,6 +618,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9d918de8d..7b184d2f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + if (!ext_conservative_rasterization) { + LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); + } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; if (ext_provoking_vertex) { provoking_vertex = { @@ -776,6 +780,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, + true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 40d00a52f..a9c0a0e4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -264,6 +264,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_conservative_rasterization. + bool IsExtConservativeRasterizationSupported() const { + return ext_conservative_rasterization; + } + /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { return ext_provoking_vertex; @@ -374,6 +379,7 @@ private: bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 15 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glsl/emit_context.cpp | 58 +++--- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 4 +- .../backend/spirv/emit_context.cpp | 97 +++++----- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/shader_recompiler/environment.h | 5 + src/shader_recompiler/frontend/ir/attribute.h | 6 + src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 202 +++++++-------------- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/program_header.h | 62 +++---- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/shader_info.h | 37 +--- src/shader_recompiler/varying_state.h | 69 +++++++ src/video_core/engines/maxwell_3d.h | 7 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +- src/video_core/shader_environment.cpp | 10 +- src/video_core/vulkan_common/vulkan_device.cpp | 6 + src/video_core/vulkan_common/vulkan_device.h | 6 + 29 files changed, 345 insertions(+), 331 deletions(-) create mode 100644 src/shader_recompiler/varying_state.h (limited to 'src/video_core/engines') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3b5708cb9..b5b7e5e83 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -229,6 +229,7 @@ add_library(shader_recompiler STATIC program_header.h runtime_info.h shader_info.h + varying_state.h ) target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 21e14867c..80dad9ff3 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,14 +83,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (generic.used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, attr_stage, index, index); + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads_position) { + if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -102,7 +101,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { @@ -124,8 +123,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 79314f130..2b96977b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -296,8 +296,10 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || - info.stores_viewport_mask) { + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index bc195d248..02c9dc6d7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -261,7 +261,7 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, fmt::format("{}.z", value), fmt::format("{}.w", value)}; read(compare_index, values); }}; - if (ctx.info.loads_position) { + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { const u32 index{static_cast(IR::Attribute::PositionX)}; if (IsInputArray(ctx.stage)) { read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); @@ -269,8 +269,8 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { - if (!ctx.info.input_generics[index].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { continue; } read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 14c009535..0d7f7bc3b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -212,22 +212,22 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { - if (!ctx.info.stores_legacy_varyings) { + if (!ctx.info.stores.Legacy()) { return; } - if (ctx.info.stores_fixed_fnc_textures) { + if (ctx.info.stores.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.stores_color_front_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_FrontColor;"; } - if (ctx.info.stores_color_front_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { header += "vec4 gl_FrontSecondaryColor;"; } - if (ctx.info.stores_color_back_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { header += "vec4 gl_BackColor;"; } - if (ctx.info.stores_color_back_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { header += "vec4 gl_BackSecondaryColor;"; } } @@ -237,32 +237,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { return; } header += "out gl_PerVertex{vec4 gl_Position;"; - if (ctx.info.stores_point_size) { + if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; } - if (ctx.info.stores_clip_distance) { + if (ctx.info.stores.ClipDistances()) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && - ctx.stage != Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } SetupLegacyOutPerVertex(ctx, header); header += "};"; - if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { - if (!ctx.info.loads_legacy_varyings) { + if (!ctx.info.loads.Legacy()) { return; } header += "in gl_PerFragment{"; - if (ctx.info.loads_fixed_fnc_textures) { + if (ctx.info.loads.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.loads_color_front_diffuse) { + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_Color;"; } header += "};"; @@ -325,14 +325,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupOutPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += - fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,11 +348,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (!info.stores_generics[index]) { - continue; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); } - DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -398,14 +396,14 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if ((info.stores_viewport_index || info.stores_layer) && + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } if (info.uses_typeless_image_reads) { @@ -535,20 +533,20 @@ void EmitContext::DefineHelperFunctions() { fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { const auto position_idx{is_array ? "gl_in[vertex]." : ""}; func += fmt::format("case {}:return {}{}[masked_index];", static_cast(IR::Attribute::PositionX) >> 2, position_idx, position_name); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; func += fmt::format("case {}:return in_attr{}{}[masked_index];", - base_attribute_value + i, i, vertex_idx); + base_attribute_value + index, index, vertex_idx); } func += "default: return 0.0;}}"; header += func; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da2..8deaf5760 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -171,7 +171,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3d2ba2eee..16e2a8502 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,7 +179,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { ctx.AddF32("{}=0.f;", inst, attr); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 6420aaa21..298881c7b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -20,8 +20,8 @@ void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { - if (!ctx.info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { continue; } const auto& info_array{ctx.output_generics.at(index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4c6501129..af4fb0c69 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -557,7 +557,7 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); + DefineInputs(program); DefineOutputs(program); } @@ -693,16 +693,16 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + index); labels.push_back(OpLabel()); } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); @@ -710,7 +710,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Const(0.0f)); size_t label_index{0}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{is_array ? OpAccessChain(input_f32, input_position, vertex, masked_index) @@ -719,18 +719,18 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturnValue(result); ++label_index; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - if (!info.input_generics[i].used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast(i))}; + const auto type{AttrTypes(*this, static_cast(index))}; if (!type) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(i)}; + const Id generic_id{input_generics.at(index)}; const Id pointer{is_array ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) : OpAccessChain(type->pointer, generic_id, masked_index)}; @@ -758,19 +758,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - literals.push_back(base_attribute_value + static_cast(i)); + literals.push_back(base_attribute_value + static_cast(index)); labels.push_back(OpLabel()); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { literals.push_back(static_cast(IR::Attribute::ClipDistance0) >> 2); labels.push_back(OpLabel()); literals.push_back(static_cast(IR::Attribute::ClipDistance4) >> 2); @@ -781,28 +781,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturn(); size_t label_index{0}; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - if (output_generics[i][0].num_components != 4) { + if (output_generics[index][0].num_components != 4) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[i][0].id}; + const Id generic_id{output_generics[index][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); @@ -1146,7 +1146,10 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1183,15 +1186,20 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } - if (info.loads_primitive_id) { + if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); } - if (info.loads_position) { + if (loads.AnyComponent(IR::Attribute::PositionX)) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } } - if (info.loads_instance_id) { + if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { @@ -1199,7 +1207,7 @@ void EmitContext::DefineInputs(const Info& info) { base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } - if (info.loads_vertex_id) { + if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { @@ -1207,24 +1215,24 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } - if (info.loads_front_face) { + if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } - if (info.loads_point_coord) { + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); } - if (info.loads_tess_coord) { + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } - for (size_t index = 0; index < info.input_generics.size(); ++index) { - if (!runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { continue; } - const InputVarying generic{info.input_generics[index]}; - if (!generic.used) { + if (!loads.Generic(index)) { continue; } - const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1234,10 +1242,13 @@ void EmitContext::DefineInputs(const Info& info) { Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } if (stage != Stage::Fragment) { continue; } - switch (generic.interpolation) { + switch (info.interpolation[index]) { case Interpolation::Smooth: // Default // Decorate(id, spv::Decoration::Smooth); @@ -1266,42 +1277,42 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const IR::Program& program) { const Info& info{program.info}; const std::optional invocations{program.invocations}; - if (info.stores_position || stage == Stage::VertexB) { + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || runtime_info.fixed_state_point_size) { + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } - if (info.stores_layer && + if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } - if (info.stores_viewport_index && + if (info.stores[IR::Attribute::ViewportIndex] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 527685fb8..e277bc358 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -300,7 +300,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineInputs(const Info& info); + void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 278c262f8..ddb86d070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -281,11 +281,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); break; } - if (program.info.stores_point_size) { + if (program.info.stores[IR::Attribute::PointSize]) { ctx.AddCapability(spv::Capability::GeometryPointSize); } ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; @@ -377,20 +385,21 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_viewport_index) { + if (info.stores[IR::Attribute::ViewportIndex]) { ctx.AddCapability(spv::Capability::MultiViewport); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { ctx.AddExtension("SPV_NV_viewport_array2"); ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); } - if (info.stores_layer || info.stores_viewport_index) { + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 85bd72389..77fbb2b2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,7 +298,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 090bc1c08..8369d0d84 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -31,6 +31,10 @@ public: return sph; } + [[nodiscard]] const std::array& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + [[nodiscard]] Stage ShaderStage() const noexcept { return stage; } @@ -41,6 +45,7 @@ public: protected: ProgramHeader sph{}; + std::array gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; }; diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30..ca1199494 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast(static_cast(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d..ebcaa8bc2 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152..5e32ac784 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); } - info.loads_position |= header.vtg.imap_systemb.position != 0; + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings } if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d46be1638..ee1887b56 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -34,6 +34,7 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 6933750aa..bd6c2bfb5 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -37,7 +37,9 @@ struct ProgramHeader { BitField<15, 1, u32> kills_pixels; BitField<16, 1, u32> does_global_store; BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; @@ -79,24 +81,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } imap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } imap_generic_vector[16]; + std::array imap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { @@ -122,24 +110,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } omap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } omap_generic_vector[16]; + std::array omap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // OmapColor @@ -157,18 +131,24 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return imap_generic_vector[index >> 1].first != 0; - } - return imap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } - [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return omap_generic_vector[index >> 1].first != 0; - } - return omap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } } vtg; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 63fe2afaf..f3f83a258 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "shader_recompiler/varying_state.h" namespace Shader { @@ -60,7 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; - std::bitset<32> previous_stage_stores_generic{}; + VaryingState previous_stage_stores; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a20e15d2e..4ef4dbd40 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" #include #include @@ -44,11 +45,6 @@ enum class Interpolation { NoPerspective, }; -struct InputVarying { - Interpolation interpolation{Interpolation::Smooth}; - bool used{false}; -}; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -121,18 +117,10 @@ struct Info { bool uses_subgroup_shuffles{}; std::array uses_patches{}; - std::array input_generics{}; - bool loads_primitive_id{}; - bool loads_position{}; - bool loads_color_front_diffuse{}; - bool loads_fixed_fnc_textures{}; - bool loads_point_coord{}; - bool loads_instance_id{}; - bool loads_vertex_id{}; - bool loads_front_face{}; - bool loads_legacy_varyings{}; - - bool loads_tess_coord{}; + std::array interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; bool loads_indexed_attributes{}; @@ -140,21 +128,6 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::bitset<32> stores_generics{}; - bool stores_layer{}; - bool stores_viewport_index{}; - bool stores_point_size{}; - bool stores_position{}; - bool stores_color_front_diffuse{}; - bool stores_color_front_specular{}; - bool stores_color_back_diffuse{}; - bool stores_color_back_specular{}; - bool stores_fixed_fnc_textures{}; - bool stores_clip_distance{}; - bool stores_fog_coordinate{}; - bool stores_viewport_mask{}; - bool stores_legacy_varyings{}; - bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] || mask[static_cast(base) + 1] || + mask[static_cast(base) + 2] || mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] && mask[static_cast(base) + 1] && + mask[static_cast(base) + 2] && mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded671..471d5686a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b7745..06e39a503 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b56..429cab30d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f8..da4721e6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d..d0adc0127 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From fba6bd92d456b4d472ed37e663006fafeef154a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 17:46:01 -0300 Subject: vk_rasterizer: Workaround bug in VK_EXT_vertex_input_dynamic_state Workaround potential bug on Nvidia's driver where only updating high attributes leaves low attributes out dated. --- src/video_core/engines/maxwell_3d.h | 4 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 +++++++++++++--------- 4 files changed, 20 insertions(+), 19 deletions(-) (limited to 'src/video_core/engines') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 471d5686a..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -305,10 +305,6 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } - bool IsConstant() const { - return constant; - } - bool IsValid() const { return size != Size::Invalid; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0f0d780b5..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { const auto gl_index = static_cast(index); // Disable constant attributes. - if (attrib.IsConstant()) { + if (attrib.constant) { glDisableVertexAttribArray(gl_index); continue; } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d089da8a4..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -128,7 +128,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.enabled.Assign(input.constant ? 0 : 1); attribute.buffer.Assign(input.buffer); attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bb7301c53..99576b826 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -801,25 +801,30 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) boost::container::static_vector bindings; boost::container::static_vector attributes; + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexAttribute0 + index]) { - continue; + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = attribute.IsConstant() - ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 - : MaxwellToVK::VertexFormat(attribute.type, attribute.size), - .offset = attribute.offset, - }); + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { -- cgit v1.2.3