From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/video_core/vulkan_common/vulkan_device.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..85f903125 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = false, .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, + .shaderFloat64 = true, + .shaderInt64 = true, + .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + // is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- src/shader_recompiler/CMakeLists.txt | 5 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 63 +++++++++++++++++-- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_floating_point.cpp | 6 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 32 +++++----- src/shader_recompiler/frontend/ir/ir_emitter.h | 8 +-- src/shader_recompiler/frontend/ir/modifiers.h | 23 ++++--- .../impl/floating_point_conversion_integer.cpp | 19 ++++-- .../ir_opt/collect_shader_info_pass.cpp | 71 ++++++++++++++++++++-- .../global_memory_to_storage_buffer_pass.cpp | 1 - src/shader_recompiler/main.cpp | 13 +++- src/shader_recompiler/profile.h | 9 ++- src/shader_recompiler/recompiler.cpp | 5 +- src/shader_recompiler/recompiler.h | 4 +- src/shader_recompiler/shader_info.h | 7 ++- .../renderer_vulkan/vk_compute_pipeline.cpp | 7 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 ++++- src/video_core/vulkan_common/vulkan_device.cpp | 26 +++++--- src/video_core/vulkan_common/vulkan_device.h | 33 +++++----- src/video_core/vulkan_common/vulkan_wrapper.cpp | 2 - 20 files changed, 260 insertions(+), 93 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6047f3ebe..fbd4ec6dc 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h + frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC shader_info.h ) -target_include_directories(shader_recompiler PRIVATE sirit) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) -target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) +target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) add_executable(shader_util main.cpp) target_link_libraries(shader_util PRIVATE shader_recompiler) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4ce07c281..2519e446a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,8 +14,6 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { template @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { throw NotImplementedException("Phi node type {}", type); } } + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + if (!profile.support_float_controls) { + return; + } + const Info& info{program.info}; + if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && + !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { + return; + } + ctx.AddExtension("SPV_KHR_float_controls"); + + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + } + } +} } // Anonymous namespace -std::vector EmitSPIRV(Environment& env, IR::Program& program) { +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -131,10 +181,11 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.OpFunctionEnd(); } boost::container::small_vector interfaces; - if (program.info.uses_workgroup_id) { + const Info& info{program.info}; + if (info.uses_workgroup_id) { interfaces.push_back(ctx.workgroup_id); } - if (program.info.uses_local_invocation_id) { + if (info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } const std::span interfaces_span(interfaces.data(), interfaces.size()); @@ -144,6 +195,8 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); + SetupDenormControl(profile, program, ctx, func); + return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 2b59c0b72..de624a151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -11,10 +11,12 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, + IR::Program& program); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9ef180531..c9687de37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::NoContraction); } switch (flags.rounding) { + case IR::FpRounding::DontCare: + break; case IR::FpRounding::RN: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); break; case IR::FpRounding::RM: ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); break; } - if (flags.fmz_mode != IR::FmzMode::FTZ) { - throw NotImplementedException("Denorm management not implemented"); - } return op; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } -F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -129,10 +129,10 @@ public: [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,25 +4,30 @@ #pragma once +#include "common/common_types.h" + namespace Shader::IR { enum class FmzMode : u8 { - None, // Denorms are not flushed, NAN is propagated (nouveau) - FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) - FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) }; enum class FpRounding : u8 { - RN, // Round to nearest even, - RM, // Round towards negative infinity - RP, // Round towards positive infinity - RZ, // Round towards zero + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero }; struct FpControl { bool no_contraction{false}; - FpRounding rounding{FpRounding::RN}; - FmzMode fmz_mode{FmzMode::FTZ}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; }; static_assert(sizeof(FpControl) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,23 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { namespace { -void AddConstantBufferDescriptor(Info& info, u32 index) { - auto& descriptor{info.constant_buffers.at(index)}; - if (descriptor) { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { return; } - descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + info.constant_buffer_mask |= 1U << index; + info.constant_buffer_descriptors.push_back({ .index{index}, .count{1}, }); } -void Visit(Info& info, IR::Inst& inst) { +void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { break; case IR::Opcode::GetCbuf: if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32()); + AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { break; } } + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); +} } // Anonymous namespace void CollectShaderInfoPass(IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_offset{storage_buffer.offset}, .count{1}, }); - info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); ++storage_index; } for (const StorageInst& storage_inst : to_replace) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index abd44e323..72565f477 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -60,6 +60,17 @@ void RunDatabase() { fmt::print(stdout, "{} ms", duration_cast(t - t0).count() / double(N)); } +static constexpr Profile PROFILE{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = true, + .support_separate_rounding_mode = true, + .support_fp16_denorm_preserve = true, + .support_fp32_denorm_preserve = true, + .support_fp16_denorm_flush = true, + .support_fp32_denorm_flush = true, +}; + int main() { // RunDatabase(); @@ -76,7 +87,7 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + const std::vector spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); std::fclose(file); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c96d783b7..9881bebab 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -7,7 +7,14 @@ namespace Shader { struct Profile { - bool unified_descriptor_binding; + bool unified_descriptor_binding{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b25081e39..527e19c27 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -14,14 +14,15 @@ namespace Shader { -std::pair> RecompileSPIRV(Environment& env, u32 start_address) { +std::pair> RecompileSPIRV(const Profile& profile, Environment& env, + u32 start_address) { ObjectPool flow_block_pool; ObjectPool inst_pool; ObjectPool block_pool; Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 4cb973878..2529463ae 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -9,10 +9,12 @@ #include "common/common_types.h" #include "shader_recompiler/environment.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" namespace Shader { -[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); +[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, + Environment& env, u32 start_address); } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f49a79368..8766bf13e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -31,14 +31,15 @@ struct Info { bool uses_local_invocation_id{}; bool uses_fp16{}; bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; u32 constant_buffer_mask{}; - std::array constant_buffers{}; boost::container::static_vector constant_buffer_descriptors; - - std::array storage_buffers{}; boost::container::static_vector storage_buffers_descriptors; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -684,7 +684,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -128,6 +128,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -223,11 +228,6 @@ public: return reported_extensions; } - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - u64 GetDeviceLocalMemory() const { return device_access_memory; } @@ -245,6 +245,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,14 +270,15 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. @@ -301,9 +305,6 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- src/shader_recompiler/CMakeLists.txt | 4 +- .../backend/spirv/emit_context.cpp | 64 ++- src/shader_recompiler/backend/spirv/emit_context.h | 18 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 44 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 18 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 55 ++- .../backend/spirv/emit_spirv_control_flow.cpp | 23 +- src/shader_recompiler/environment.h | 14 + src/shader_recompiler/frontend/ir/attribute.cpp | 2 +- src/shader_recompiler/frontend/ir/attribute.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 + src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 11 +- src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/ir/reg.h | 4 +- .../frontend/maxwell/control_flow.cpp | 31 +- .../frontend/maxwell/control_flow.h | 3 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 18 + .../frontend/maxwell/translate/impl/exit.cpp | 15 - .../maxwell/translate/impl/exit_program.cpp | 43 ++ .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/load_store_attribute.cpp | 86 +++- .../maxwell/translate/impl/not_implemented.cpp | 16 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 60 ++- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- src/shader_recompiler/program_header.h | 143 +++++++ src/shader_recompiler/recompiler.cpp | 28 -- src/shader_recompiler/recompiler.h | 20 - src/shader_recompiler/shader_info.h | 10 + src/shader_recompiler/stage.h | 19 + src/video_core/CMakeLists.txt | 6 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 + .../renderer_vulkan/fixed_pipeline_state.h | 9 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 24 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 162 ++++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 209 ++-------- .../renderer_vulkan/vk_compute_pipeline.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 445 +++++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 66 +++ src/video_core/renderer_vulkan/vk_pipeline.h | 36 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 346 ++++++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 82 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 47 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_render_pass_cache.cpp | 100 +++++ .../renderer_vulkan/vk_render_pass_cache.h | 53 +++ .../renderer_vulkan/vk_texture_cache.cpp | 68 +--- src/video_core/renderer_vulkan/vk_texture_cache.h | 29 +- src/video_core/vulkan_common/vulkan_device.cpp | 15 + 54 files changed, 1927 insertions(+), 566 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp create mode 100644 src/shader_recompiler/program_header.h delete mode 100644 src/shader_recompiler/recompiler.cpp delete mode 100644 src/shader_recompiler/recompiler.h create mode 100644 src/shader_recompiler/stage.h create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b870e9937..31c394106 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h + program_header.h profile.h - recompiler.cpp - recompiler.h shader_info.h ) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 204389d74..6c79b611b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineSpecialVariables(program.info); - - u32 binding{}; + DefineInterfaces(program.info, program.stage); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); - DefineLabels(program); } @@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) { return Constant(F32[1], value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); + case IR::Type::Label: + return value.Label()->Definition(); default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + if (info.uses_int8) { AddCapability(spv::Capability::Int8); U8 = Name(TypeInt(8, false), "u8"); @@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() { u32_zero_value = Constant(U32[1], 0U); } -void EmitContext::DefineSpecialVariables(const Info& info) { - const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { - const Id pointer_type{TypePointer(storage_class, type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; - Decorate(id, spv::Decoration::BuiltIn, builtin); - return id; - }}; +void EmitContext::DefineInterfaces(const Info& info, Stage stage) { + const auto define{ + [this](Id type, std::optional builtin, spv::StorageClass storage_class) { + const Id pointer_type{TypePointer(storage_class, type)}; + const Id id{AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + interfaces.push_back(id); + return id; + }}; using namespace std::placeholders; const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; + const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; if (info.uses_workgroup_id) { workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); @@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); } + if (info.loads_position) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = define_input(F32[4], built_in); + } + for (size_t i = 0; i < info.loads_generics.size(); ++i) { + if (info.loads_generics[i]) { + // FIXME: Declare size from input + input_generics[i] = define_input(F32[4], std::nullopt); + Decorate(input_generics[i], spv::Decoration::Location, static_cast(i)); + Name(input_generics[i], fmt::format("in_attr{}", i)); + } + } + if (info.stores_position) { + output_position = define_output(F32[4], spv::BuiltIn::Position); + } + for (size_t i = 0; i < info.stores_generics.size(); ++i) { + if (info.stores_generics[i]) { + output_generics[i] = define_output(F32[4], std::nullopt); + Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); + Name(output_generics[i], fmt::format("out_attr{}", i)); + } + } + if (stage == Stage::Fragment) { + for (size_t i = 0; i < 8; ++i) { + if (!info.stores_frag_color[i]) { + continue; + } + frag_color[i] = define_output(F32[4], std::nullopt); + Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); + Name(frag_color[i], fmt::format("frag_color{}", i)); + } + } } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 35eca258a..2d7961ac3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -46,7 +46,7 @@ struct UniformDefinitions { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -71,6 +71,9 @@ public: UniformDefinitions uniform_types; + Id input_f32{}; + Id output_f32{}; + Id storage_u32{}; std::array cbufs{}; @@ -80,10 +83,21 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id input_position{}; + std::array input_generics{}; + + Id output_position{}; + std::array output_generics{}; + + std::array frag_color{}; + Id frag_depth {}; + + std::vector interfaces; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineSpecialVariables(const Info& info); + void DefineInterfaces(const Info& info, Stage stage); void DefineConstantBuffers(const Info& info, u32& binding); void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50c0f7243..b8978b94a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.U32(); } else if constexpr (std::is_same_v) { return arg.Label(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); } } @@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{profile, program}; +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, + u32& binding) { + EmitContext ctx{profile, program, binding}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; for (IR::Block* const block : program.blocks) { @@ -208,28 +211,41 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program } } ctx.OpFunctionEnd(); - boost::container::small_vector interfaces; - const Info& info{program.info}; - if (info.uses_workgroup_id) { - interfaces.push_back(ctx.workgroup_id); + + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (env.ShaderStage()) { + case Shader::Stage::Compute: { + const std::array workgroup_size{env.WorkgroupSize()}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; } - if (info.uses_local_invocation_id) { - interfaces.push_back(ctx.local_invocation_id); + case Shader::Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Shader::Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); + break; + default: + throw NotImplementedException("Stage {}", env.ShaderStage()); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); - - const std::array workgroup_size{env.WorkgroupSize()}; - ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], - workgroup_size[2]); + ctx.AddEntryPoint(execution_model, func, "main", interfaces); SetupDenormControl(profile, program, ctx, func); + const Info& info{program.info}; if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { ctx.AddCapability(spv::Capability::SparseResidency); } + if (info.uses_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89566c83d..ae121f534 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,18 +16,18 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, - IR::Program& program); + IR::Program& program, u32& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, IR::Block* label); -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); @@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx); -void EmitSetAttribute(EmitContext& ctx); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); void EmitGetAttributeIndexed(EmitContext& ctx); void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 125b58cf7..02d115740 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -5,6 +5,43 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} +} // Anonymous namespace void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); @@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); } -void EmitGetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { + return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } -void EmitSetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { + ctx.OpStore(OutputAttrPointer(ctx, attr), value); } void EmitGetAttributeIndexed(EmitContext&) { @@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + const Id component_id{ctx.Constant(ctx.U32[1], component)}; + const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); +} + +void EmitSetFragDepth(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.frag_depth, value); +} + void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 48755b827..6b81f0169 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,26 +6,29 @@ namespace Shader::Backend::SPIRV { -void EmitBranch(EmitContext& ctx, IR::Block* label) { - ctx.OpBranch(label->Definition()); +void EmitBranch(EmitContext& ctx, Id label) { + ctx.OpBranch(label); } -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { - ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { + ctx.OpBranchConditional(condition, true_label, false_label); } -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { - ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), - spv::LoopControlMask::MaskNone); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { + ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); } -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { - ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); } void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { + ctx.OpDemoteToHelperInvocationEXT(); + ctx.OpBranch(continue_label); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0fcb68050..1fcaa56dd 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,6 +3,8 @@ #include #include "common/common_types.h" +#include "shader_recompiler/stage.h" +#include "shader_recompiler/program_header.h" namespace Shader { @@ -15,6 +17,18 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; [[nodiscard]] virtual std::array WorkgroupSize() = 0; + + [[nodiscard]] const ProgramHeader& SPH() const noexcept { + return sph; + } + + [[nodiscard]] Stage ShaderStage() const noexcept { + return stage; + } + +protected: + ProgramHeader sph{}; + Stage stage{}; }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 2fb7d576f..4811242ea 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept { return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; } -int GenericAttributeIndex(Attribute attribute) { +u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index bb2cad6af..34ec7e0cd 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -224,7 +224,7 @@ enum class Attribute : u64 { [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; -[[nodiscard]] int GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 958282160..672836c0b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,12 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::DemoteToHelperInvocation(Block* continue_label) { + block->SetBranch(continue_label); + continue_label->AddImmediatePredecessor(block); + Inst(Opcode::DemoteToHelperInvocation, continue_label); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } @@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { + Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); +} + +void IREmitter::SetFragDepth(const F32& value) { + Inst(Opcode::SetFragDepth, value); +} + U32 IREmitter::WorkgroupIdX() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 05263fe8b..72af5db37 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -36,6 +36,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void DemoteToHelperInvocation(Block* continue_label); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -67,6 +68,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); + void SetFragDepth(const F32& value); + [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5946105d2..21b7d8a9f 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::DemoteToHelperInvocation: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetFragColor: + case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: case Opcode::WriteGlobalU16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9052a4903..593faca52 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(DemoteToHelperInvocation, Void, Label, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) @@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU64, U64, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetAttribute, F32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, F32, ) +OPCODE(GetAttributeIndexed, F32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index bce8b19b3..733513c8b 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" namespace Shader::IR { @@ -17,6 +18,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; Info info; + Stage stage{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 8fea05f7b..3845ec5fb 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } -[[nodiscard]] constexpr Reg operator++(Reg& reg) { +constexpr Reg operator++(Reg& reg) { reg = reg + 1; return reg; } -[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { +constexpr Reg operator++(Reg& reg, int) { const Reg copy{reg}; reg = reg + 1; return copy; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d..4f6707fae 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { case Opcode::EXIT: case Opcode::JMP: case Opcode::JMX: + case Opcode::KIL: case Opcode::BRK: case Opcode::CONT: case Opcode::LONGJMP: @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati block->end = pc; return AnalysisState::Branch; } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } case Opcode::PBK: case Opcode::PCNT: case Opcode::PEXIT: @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond, - bool visit_conditional_inst) { + EndClass insn_end_class, IR::Condition cond) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block block->branch_false = endif_block; - // And branch to it from the conditional instruction if it is a branch - if (insn_end_class == EndClass::Branch) { + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { conditional_block->cond = IR::Condition{true}; conditional_block->branch_true = endif_block; conditional_block->branch_false = nullptr; @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } - block->end = pc; + block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; } @@ -505,6 +514,12 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; } } if (function.entrypoint == 8) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210f..22f134194 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -29,6 +29,7 @@ enum class EndClass { Call, Exit, Return, + Kill, }; enum class Token { @@ -130,7 +131,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond, bool visit_conditional_inst); + IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa64326..0074eb89b 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolid, DumpExpr(stmt->op)); break; @@ -424,6 +430,9 @@ private: gotos.push_back(root.insert(ip, *goto_stmt)); break; } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{})); + break; } } } @@ -729,6 +738,15 @@ private: current_block = nullptr; break; } + case StatementType::Kill: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d1..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { - -void TranslatorVisitor::EXIT(u64) { - ir.Exit(); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..ea9b33da9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + throw NotImplementedException("Sample mask"); + } + if (sph.ps.omap.depth != 0) { + throw NotImplementedException("Fragment depth"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b..ed81d9c36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -108,7 +108,7 @@ public: void DSETP_reg(u64 insn); void DSETP_cbuf(u64 insn); void DSETP_imm(u64 insn); - void EXIT(u64 insn); + void EXIT(); void F2F_reg(u64 insn); void F2F_cbuf(u64 insn); void F2F_imm(u64 insn); @@ -220,7 +220,7 @@ public: void JCAL(u64 insn); void JMP(u64 insn); void JMX(u64 insn); - void KIL(u64 insn); + void KIL(); void LD(u64 insn); void LDC(u64 insn); void LDG(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4..2922145ee 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -11,6 +11,13 @@ namespace Shader::Maxwell { namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + enum class InterpolationMode : u64 { Pass, Multiply, @@ -23,8 +30,85 @@ enum class SampleMode : u64 { Centroid, Offset, }; + +int NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} } // Anonymous namespace +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> stream_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + if (ald.o != 0) { + throw NotImplementedException("O"); + } + if (ald.patch != 0) { + throw NotImplementedException("P"); + } + if (ald.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed"); + } + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ald.size)}; + for (int element = 0; element < num_elements; ++element) { + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + } +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> stream_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.patch != 0) { + throw NotImplementedException("P"); + } + if (ast.stream_reg != IR::Reg::RZ) { + throw NotImplementedException("Stream store"); + } + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ast.size)}; + for (int element = 0; element < num_elements; ++element) { + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + } +} + void TranslatorVisitor::IPA(u64 insn) { // IPA is the instruction used to read varyings from a fragment shader. // gl_FragCoord is mapped to the gl_Position attribute. @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { // } const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; if (is_indexed) { - throw NotImplementedException("IPA.IDX"); + throw NotImplementedException("IDX"); } const IR::Attribute attribute{ipa.attribute}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef54..59252bcc5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { ThrowNotImplemented(Opcode::AL2P); } -void TranslatorVisitor::ALD(u64) { - ThrowNotImplemented(Opcode::ALD); -} - -void TranslatorVisitor::AST(u64) { - ThrowNotImplemented(Opcode::AST); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::EXIT(u64) { - throw LogicError("Visting EXIT instruction"); -} - void TranslatorVisitor::F2F_reg(u64) { ThrowNotImplemented(Opcode::F2F_reg); } @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { ThrowNotImplemented(Opcode::JMX); } -void TranslatorVisitor::KIL(u64) { - ThrowNotImplemented(Opcode::KIL); +void TranslatorVisitor::KIL() { + // KIL is a no-op } void TranslatorVisitor::LD(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c64..0fbb87ec4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const tex{insn}; - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset * 4)); } void TranslatorVisitor::TEX_b(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b00..54f0df754 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::Value Sample(TranslatorVisitor& v, u64 insn) { const Encoding texs{insn}; - const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset * 4))}; const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::Reg reg_a{texs.src_reg_a}; const IR::Reg reg_b{texs.src_reg_b}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 708b6b267..fbbe28632 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { return; } info.constant_buffer_mask |= 1U << index; - info.constant_buffer_descriptors.push_back({ - .index{index}, - .count{1}, - }); + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), + ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void GetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.loads_position = true; + break; + default: + throw NotImplementedException("Get attribute {}", attribute); + } +} + +void SetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.stores_position = true; + break; + default: + throw NotImplementedException("Set attribute {}", attribute); + } } void VisitUsages(Info& info, IR::Inst& inst) { @@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { + case IR::Opcode::DemoteToHelperInvocation: + info.uses_demote_to_helper_invocation = true; + break; + case IR::Opcode::GetAttribute: + GetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetAttribute: + SetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetFragColor: + info.stores_frag_color[inst.Arg(0).U32()] = true; + break; + case IR::Opcode::SetFragDepth: + info.stores_frag_depth = true; + break; case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; break; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index d09bcec36..bab7ca186 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -169,7 +169,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op == same || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { // Unique value or self-reference continue; } diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..1544bfa42 --- /dev/null +++ b/src/shader_recompiler/program_header.h @@ -0,0 +1,143 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +enum class PixelImap : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html +struct ProgramHeader { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 5, u32> reserved; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + }; + INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved + INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // OmapColor + INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + } vtg; + + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; + u8 raw; + } imap_generic_vector[32]; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved + + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; + } omap; + + [[nodiscard]] std::array EnabledOutputComponents(u32 rt) const noexcept { + const u32 bits{omap.target >> (rt * 4)}; + return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; + } + + [[nodiscard]] std::array GenericInputMap(u32 attribute) const { + const auto& vector{imap_generic_vector[attribute]}; + return {vector.x, vector.y, vector.z, vector.w}; + } + } ps; + + std::array raw; + }; + + [[nodiscard]] u64 LocalMemorySize() const noexcept { + return (common1.shader_local_memory_low_size | + (common2.shader_local_memory_high_size << 24)); + } +}; +static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp deleted file mode 100644 index 527e19c27..000000000 --- a/src/shader_recompiler/recompiler.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/object_pool.h" -#include "shader_recompiler/recompiler.h" - -namespace Shader { - -std::pair> RecompileSPIRV(const Profile& profile, Environment& env, - u32 start_address) { - ObjectPool flow_block_pool; - ObjectPool inst_pool; - ObjectPool block_pool; - - Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; - IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; -} - -} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h deleted file mode 100644 index 2529463ae..000000000 --- a/src/shader_recompiler/recompiler.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/shader_info.h" - -namespace Shader { - -[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, - Environment& env, u32 start_address); - -} // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index adc1d9a64..6eff762e2 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -56,6 +56,15 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + + std::array loads_generics{}; + bool loads_position{}; + + std::array stores_frag_color{}; + bool stores_frag_depth{}; + std::array stores_generics{}; + bool stores_position{}; + bool uses_fp16{}; bool uses_fp64{}; bool uses_fp16_denorms_flush{}; @@ -68,6 +77,7 @@ struct Info { bool uses_image_1d{}; bool uses_sampled_1d{}; bool uses_sparse_residency{}; + bool uses_demote_to_helper_invocation{}; IR::Type used_constant_buffer_types{}; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..fc6ce6043 --- /dev/null +++ b/src/shader_recompiler/stage.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +enum class Stage { + Compute, + VertexA, + VertexB, + TessellationControl, + TessellationEval, + Geometry, + Fragment, +}; + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3323e6916..71b07c194 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,7 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/pipeline_helper.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -116,15 +117,18 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_render_pass_cache.cpp + renderer_vulkan/vk_render_pass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d8f683907 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast(regs.zeta.format)); + std::ranges::transform(regs.rt, color_formats.begin(), + [](const auto& rt) { return static_cast(rt.format); }); alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..348f1d6ce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,7 +60,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array Mask() const noexcept { + std::array Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +97,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast(size.Value()); } }; @@ -187,7 +187,10 @@ struct FixedPipelineState { u32 raw2; BitField<0, 3, u32> alpha_test_func; BitField<3, 1, u32> early_z; + BitField<4, 1, u32> depth_enabled; + BitField<5, 5, u32> depth_format; }; + std::array color_formats; u32 alpha_test_ref; u32 point_size; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..dc4ff0da2 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..9f78e15b6 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..0a59aa659 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,162 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + [[likely]] if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +struct DescriptorLayoutTuple { + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; +}; + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutTuple Create(const vk::Device& device) { + DescriptorLayoutTuple result; + if (!bindings.empty()) { + result.descriptor_set_layout = device.CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + result.pipeline_layout = device.CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + if (!entries.empty()) { + result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *result.descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *result.pipeline_layout, + .set = 0, + }); + } + return result; + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); + } + } + +private: + void Add(VkDescriptorType type, VkShaderStageFlags stage) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + + boost::container::small_vector bindings; + boost::container::small_vector entries; + u32 binding{}; + size_t offset{}; +}; + +inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); + return {}; +} + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, + const ImageId* image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { + for (const auto& desc : info.texture_descriptors) { + const VkSampler sampler{samplers[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + ++index; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ff..6684d37a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,6 +6,7 @@ #include +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -17,140 +18,10 @@ namespace Vulkan { namespace { -vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { - boost::container::small_vector bindings; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for (const auto& desc : info.texture_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) { - boost::container::small_vector entries; - size_t offset{}; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for (const auto& desc : info.texture_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, - .pipelineLayout = pipeline_layout, - .set = 0, - }); -} - -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - } - UNREACHABLE_MSG("Invalid texture type {}", type); +DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { + DescriptorLayoutBuilder builder; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + return builder.Create(device.GetLogical()); } } // Anonymous namespace @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip VKUpdateDescriptorQueue& update_descriptor_queue_, const Shader::Info& info_, vk::ShaderModule spv_module_) : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, - spv_module(std::move(spv_module_)), - descriptor_set_layout(CreateDescriptorSetLayout(device, info)), - descriptor_allocator(descriptor_pool, *descriptor_set_layout), - pipeline_layout{device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - })}, - descriptor_update_template{ - CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, - pipeline{device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - })} {} + spv_module(std::move(spv_module_)) { + DescriptorLayoutTuple tuple{CreateLayout(device, info)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); +} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple static constexpr size_t max_elements = 64; std::array image_view_ids; boost::container::static_vector image_view_indices; - boost::container::static_vector sampler_handles; + boost::container::static_vector samplers; const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); + samplers.push_back(sampler->Handle()); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); size_t index{}; - for (const auto& desc : info.texture_descriptors) { - const VkSampler vk_sampler{sampler_handles[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; - update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); - ++index; - } + PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, + *update_descriptor_queue, index); } VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4..e82e5816b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -9,7 +9,6 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_pipeline.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,7 +17,7 @@ namespace Vulkan { class Device; -class ComputePipeline : public Pipeline { +class ComputePipeline { public: explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..a2ec418b1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,445 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include +#include + +#include "common/bit_field.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder.Create(device.GetLogical()); +} + +template +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; +} + +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { + static constexpr std::array unsupported_topologies{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { + union Swizzle { + u32 raw; + BitField<0, 3, Maxwell::ViewportSwizzle> x; + BitField<4, 3, Maxwell::ViewportSwizzle> y; + BitField<8, 3, Maxwell::ViewportSwizzle> z; + BitField<12, 3, Maxwell::ViewportSwizzle> w; + }; + const Swizzle unpacked{swizzle}; + return VkViewportSwizzleNV{ + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; +} + +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; + } + return PixelFormatFromRenderTargetFormat(format); +} + +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache, + const FixedPipelineState& state, + std::array stages, + const std::array& infos) + : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, + buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, + update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, state, render_pass); +} + +void GraphicsPipeline::Configure(bool is_indexed) { + static constexpr size_t max_images_elements = 64; + std::array image_view_ids; + static_vector image_view_indices; + static_vector samplers; + + texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache->UpdateRenderTargets(false); + + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache->UnbindGraphicsStorageBuffers(stage); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); + ++index; + } + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(cbufs[cbuf_index].enabled); + const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; + const u32 raw_handle{gpu_memory->Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache->UpdateGraphicsBuffers(is_indexed); + texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + + buffer_cache->BindHostGeometryBuffers(is_indexed); + + size_t index{}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + buffer_cache->BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), + *texture_cache, *update_descriptor_queue, index); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + + scheduler->BindGraphicsPipeline(*pipeline); + scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + }); +} + +void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!device.IsExtExtendedDynamicStateSupported()) { + dynamic = state.dynamic_state; + } + static_vector vertex_bindings; + static_vector vertex_binding_divisors; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = state.binding_divisors[index], + }); + } + } + static_vector vertex_attributes; + const auto& input_attributes = stage_infos[0].loads_generics; + for (size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; + if (!attribute.enabled || !input_attributes[index]) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &input_divisor_ci; + } + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; + std::array swizzles; + std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; + if (device.IsNvViewportSwizzleSupported()) { + viewport_ci.pNext = &swizzle_ci; + } + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = static_cast( + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + static_vector cb_attachments; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + static constexpr std::array mask_table{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; + const auto format{static_cast(state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto& blend{state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; + } + cb_attachments.push_back({ + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = write_mask, + }); + } + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast(cb_attachments.size()), + .pAttachments = cb_attachments.data(), + .blendConstants = {}, + }; + static_vector dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended{ + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + static_vector shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { + continue; + } + [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + */ + } + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..ba1d34a83 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + +public: + explicit GraphicsPipeline() = default; + explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, + TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, const FixedPipelineState& state, + std::array stages, + const std::array& infos); + + void Configure(bool is_indexed); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + +private: + void MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass); + + Tegra::Engines::Maxwell3D* maxwell3d{}; + Tegra::MemoryManager* gpu_memory{}; + TextureCache* texture_cache{}; + BufferCache* buffer_cache{}; + VKScheduler* scheduler{}; + VKUpdateDescriptorQueue* update_descriptor_queue{}; + + std::array spv_modules; + std::array stage_infos; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b06288403..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Pipeline { -public: - /// Add a reference count to the pipeline - void AddRef() noexcept { - ++ref_count; - } - - [[nodiscard]] bool RemoveRef() noexcept { - --ref_count; - return ref_count == 0; - } - - [[nodiscard]] u64 UsageTick() const noexcept { - return usage_tick; - } - -protected: - u64 usage_tick{}; - -private: - size_t ref_count{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903..c9da2080d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,8 +12,11 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" -#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/program_header.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -34,18 +37,18 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; - namespace { -class Environment final : public Shader::Environment { +using Shader::Backend::SPIRV::EmitSPIRV; + +class GenericEnvironment : public Shader::Environment { public: - explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} {} - ~Environment() override = default; + ~GenericEnvironment() override = default; - [[nodiscard]] std::optional Analyze(u32 start_address) { + std::optional Analyze(u32 start_address) { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -55,52 +58,47 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } - [[nodiscard]] size_t ShaderSize() const noexcept { + [[nodiscard]] size_t CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; + } + + [[nodiscard]] size_t ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } - [[nodiscard]] u128 ComputeHash() const { - const size_t size{ShaderSize()}; + [[nodiscard]] u128 CalculateHash() const { + const size_t size{ReadSize()}; auto data = std::make_unique(size); - gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash128(reinterpret_cast(data.get()), size); } - u64 ReadInstruction(u32 address) override { + u64 ReadInstruction(u32 address) final { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } - return gpu_memory.Read(program_base + address); - } - - u32 TextureBoundBuffer() override { - return kepler_compute.regs.tex_cb_index; - } - - std::array WorkgroupSize() override { - const auto& qmd{kepler_compute.launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + return gpu_memory->Read(program_base + address); } -private: +protected: static constexpr size_t INST_SIZE = sizeof(u64); - static constexpr size_t BLOCK_SIZE = 0x1000; - static constexpr size_t MAXIMUM_SIZE = 0x100000; - static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + std::optional TryFindSize(GPUVAddr guest_addr) { + constexpr size_t BLOCK_SIZE = 0x1000; + constexpr size_t MAXIMUM_SIZE = 0x100000; + + constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - std::optional TryFindSize(u32 start_address) { - GPUVAddr guest_addr = program_base + start_address; size_t offset = 0; size_t size = BLOCK_SIZE; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; - gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { const u64 inst = data[i / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { @@ -114,17 +112,87 @@ private: return std::nullopt; } - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - GPUVAddr program_base; + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; - u32 read_lowest = 0; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; - std::vector code; u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; }; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, + GPUVAddr program_base_, u32 start_offset) + : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + } + } + + ~GraphicsEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return maxwell3d->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + throw Shader::LogicError("Requesting workgroup size in a graphics stage"); + } + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + stage = Shader::Stage::Compute; + } + + ~ComputeEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return kepler_compute->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute->launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + const auto& float_control{device.FloatControlProperties()}; + profile = Shader::Profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel + }; +} PipelineCache::~PipelineCache() = default; +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (!RefreshStages()) { + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateGraphicsPipeline(); + return &pipeline; +} + ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return &pipeline; } pipeline = CreateComputePipeline(shader); - shader->compute_users.push_back(key); return &pipeline; } +bool PipelineCache::RefreshStages() { + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + graphics_key.unique_hashes[index] = u128{}; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 offset{shader_config.offset}; + shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + } + graphics_key.unique_hashes[index] = shader_info->unique_hash; + } + return true; +} + +const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr) { + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze(start_address)}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + flow_block_pool.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + std::array envs; + std::array programs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + + const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); + Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); + programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + } + std::array infos{}; + std::array modules; + + u32 binding{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + GraphicsEnvironment& env{envs[index]}; + Shader::IR::Program& program{programs[index]}; + + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + std::vector code{EmitSPIRV(profile, env, program, binding)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, + graphics_key.state, std::move(modules), infos); +} + ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - Environment env{kepler_compute, gpu_memory, program_base}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto& float_control{device.FloatControlProperties()}; - const Shader::Profile profile{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel - }; - const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; + Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + u32 binding{0}; + std::vector code{EmitSPIRV(profile, env, program, binding)}; /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); */ - shader_info->unique_hash = env.ComputeHash(); - shader_info->size_bytes = env.ShaderSize(); - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + shader_info->unique_hash = env.CalculateHash(); + shader_info->size_bytes = env.ReadSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ ShaderInfo shader; ComputePipeline pipeline{CreateComputePipeline(&shader)}; const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - shader.compute_users.push_back(key); - pipeline.AddRef(); - const size_t size_bytes{shader.size_bytes}; Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); return &compute_cache.emplace(key, std::move(pipeline)).first->second; @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) }; } -void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { - for (const ComputePipelineCacheKey& key : shader->compute_users) { - const auto it = compute_cache.find(key); - ASSERT(it != compute_cache.end()); - - Pipeline& pipeline = it->second; - if (pipeline.RemoveRef()) { - // Wait for the pipeline to be free of GPU usage before destroying it - scheduler.Wait(pipeline.UsageTick()); - compute_cache.erase(it); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27..60fb976df 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -12,11 +12,18 @@ #include #include -#include - #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,13 +33,6 @@ class System; namespace Vulkan { -class Device; -class RasterizerVulkan; -class ComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + } // namespace Vulkan namespace std { @@ -63,14 +83,28 @@ struct hash { } }; +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + } // namespace std namespace Vulkan { +class ComputePipeline; +class Device; +class RasterizerVulkan; +class RenderPassCache; +class VKDescriptorPool; +class VKScheduler; +class VKUpdateDescriptorQueue; + struct ShaderInfo { u128 unique_hash{}; size_t size_bytes{}; - std::vector compute_users; }; class PipelineCache final : public VideoCommon::ShaderCache { @@ -80,15 +114,23 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache); ~PipelineCache() override; - [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); -protected: - void OnShaderRemoval(ShaderInfo* shader) override; + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + bool RefreshStages(); + + const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr); + + GraphicsPipeline CreateGraphicsPipeline(); + ComputePipeline CreateComputePipeline(ShaderInfo* shader); ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); @@ -104,8 +146,20 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + + GraphicsPipelineCacheKey graphics_key{}; std::unordered_map compute_cache; + std::unordered_map graphics_cache; + + Shader::ObjectPool inst_pool; + Shader::ObjectPool block_pool; + Shader::ObjectPool flow_block_pool; + + Shader::Profile profile; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29..036b531b9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } + update_descriptor_queue.Acquire(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); + + BeginTransformFeedback(); + + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + UpdateDynamicStates(); + + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b915..88dbd753b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -148,6 +149,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..7e5ae43ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector descriptions; + u32 num_images{0}; + + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + if (format == PixelFormat::Invalid) { + continue; + } + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + ++num_images; + } + const size_t num_colors{descriptions.size()}; + const VkAttachmentReference* depth_attachment{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..db8e83f1a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + + class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..1bbc542a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (color == std::array{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector descriptions; std::vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..189ee5a68 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -26,35 +26,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast(key.depth_format) << 48; - value ^= static_cast(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,7 +37,7 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4887d6fd9..f0e5b098c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + .pNext = nullptr, + .shaderDemoteToHelperInvocation = true, + }; + SetNext(next, demote); + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8 = { @@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; + demote.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + demote.pNext = nullptr; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = &demote; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 ++ src/shader_recompiler/backend/spirv/emit_context.h | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 9 ++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_vote.cpp | 58 ++++++++++++++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 6 +++ .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/vote.cpp | 52 +++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 6 +++ src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 7 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + src/video_core/vulkan_common/vulkan_device.cpp | 1 + src/video_core/vulkan_common/vulkan_device.h | 6 +++ 18 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 23cb523a8..086bdf8d0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp backend/spirv/emit_spirv_undefined.cpp + backend/spirv/emit_spirv_vote.cpp environment.h exception.h file_environment.cpp @@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4a4de3676..36f130781 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } + if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { + subgroup_local_invocation_id = + DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 9b9e0d6b1..6e64360bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -82,6 +82,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id subgroup_local_invocation_id{}; Id instance_id{}; Id instance_index{}; Id base_instance{}; @@ -96,7 +97,7 @@ public: std::array output_generics{}; std::array frag_color{}; - Id frag_depth {}; + Id frag_depth{}; std::vector interfaces; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 93e851133..107403912 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } + if (info.uses_subgroup_vote && profile.support_vote) { + ctx.AddExtension("SPV_KHR_shader_ballot"); + ctx.AddCapability(spv::Capability::SubgroupBallotKHR); + if (!profile.warp_size_potentially_larger_than_guest) { + // vote ops are only used when not taking the long path + ctx.AddExtension("SPV_KHR_subgroup_vote"); + ctx.AddCapability(spv::Capability::SubgroupVoteKHR); + } + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 960d022ff..ce23200f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod_lc, Id offset); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp new file mode 100644 index 000000000..a63677ef2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id LargeWarpBallot(EmitContext& ctx, Id ballot) { + const Id shift{ctx.Constant(ctx.U32[1], 5)}; + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); +} +} // Anonymous namespace + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return LargeWarpBallot(ctx, ballot); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 432dd29a5..ff2970125 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +U1 IREmitter::VoteAll(const U1& value) { + return Inst(Opcode::VoteAll, value); +} + +U1 IREmitter::VoteAny(const U1& value) { + return Inst(Opcode::VoteAny, value); +} + +U1 IREmitter::VoteEqual(const U1& value) { + return Inst(Opcode::VoteEqual, value); +} + +U32 IREmitter::SubgroupBallot(const U1& value) { + return Inst(Opcode::SubgroupBallot, value); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 346cef3ab..1708be3ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -234,6 +234,11 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); + [[nodiscard]] U1 VoteAny(const U1& value); + [[nodiscard]] U1 VoteEqual(const U1& value); + [[nodiscard]] U32 SubgroupBallot(const U1& value); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index bdc07b9a7..fe888b8b2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32, OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +// Vote operations +OPCODE(VoteAll, U1, U1, ) +OPCODE(VoteAny, U1, U1, ) +OPCODE(VoteEqual, U1, U1, ) +OPCODE(SubgroupBallot, U32, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 27b12ff3c..c0e36a7e2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) { ThrowNotImplemented(Opcode::VMNMX); } -void TranslatorVisitor::VOTE(u64) { - ThrowNotImplemented(Opcode::VOTE); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..a88894a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f44eac5d8..db5138e4d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::VoteAll: + case IR::Opcode::VoteAny: + case IR::Opcode::VoteEqual: + case IR::Opcode::SubgroupBallot: + info.uses_subgroup_vote = true; + break; default: break; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3181c79fb..b57cbc310 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -19,6 +19,8 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_vote{}; + bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f97730b34..3d9f04d1a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -80,6 +80,7 @@ struct Info { bool uses_sampled_1d{}; bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; + bool uses_subgroup_vote{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6684d37a6..8e544d745 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 41fc9588f..bdbc8dd1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_vote = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0e5b098c..009b74f12 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -737,6 +737,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + ext_subgroup_size_control = true; } } else { is_warp_potentially_bigger = true; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 82bccc8f0..c268a4f8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -193,6 +193,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_subgroup_size_control. + bool IsExtSubgroupSizeControlSupported() const { + return ext_subgroup_size_control; + } + /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { return ext_transform_feedback; @@ -297,6 +302,7 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 115 +++++++++++- src/shader_recompiler/backend/spirv/emit_context.h | 21 +++ src/shader_recompiler/backend/spirv/emit_spirv.h | 14 ++ .../backend/spirv/emit_spirv_context_get_set.cpp | 10 ++ .../backend/spirv/emit_spirv_shared_memory.cpp | 175 ++++++++++++++++++ src/shader_recompiler/environment.h | 4 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 46 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 6 + .../frontend/ir/microinstruction.cpp | 6 + src/shader_recompiler/frontend/ir/opcodes.inc | 18 ++ src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../translate/impl/load_store_local_shared.cpp | 197 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 -- .../ir_opt/collect_shader_info_pass.cpp | 6 + src/shader_recompiler/profile.h | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 47 ++++- src/video_core/vulkan_common/vulkan_device.cpp | 34 ++++ src/video_core/vulkan_common/vulkan_device.h | 42 +++-- 20 files changed, 730 insertions(+), 36 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 55b846c84..003cbefb1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_shared_memory.cpp backend/spirv/emit_spirv_special.cpp backend/spirv/emit_spirv_undefined.cpp backend/spirv/emit_spirv_warp.cpp @@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/load_constant.cpp frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp + frontend/maxwell/translate/impl/load_store_local_shared.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/logic_operation.cpp frontend/maxwell/translate/impl/logic_operation_three_input.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a8ca33c1d..96d0e9b4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -9,6 +9,7 @@ #include #include "common/common_types.h" +#include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_context.h" namespace Shader::Backend::SPIRV { @@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) - : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); DefineInterfaces(program.info); + DefineLocalMemory(program); + DefineSharedMemory(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); @@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); @@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) { DefineOutputs(info); } +void EmitContext::DefineLocalMemory(const IR::Program& program) { + if (program.local_memory_size == 0) { + return; + } + const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id pointer{TypePointer(spv::StorageClass::Private, type)}; + local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(local_memory); + } +} + +void EmitContext::DefineSharedMemory(const IR::Program& program) { + if (program.shared_memory_size == 0) { + return; + } + const auto make{[&](Id element_type, u32 element_size) { + const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; + const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); + Decorate(struct_type, spv::Decoration::Block); + + const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; + const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; + const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; + Decorate(variable, spv::Decoration::Aliased); + interfaces.push_back(variable); + + return std::make_pair(variable, element_pointer); + }}; + if (profile.support_explicit_workgroup_layout) { + AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); + if (program.info.uses_int8) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); + std::tie(shared_memory_u8, shared_u8) = make(U8, 1); + } + if (program.info.uses_int16) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); + std::tie(shared_memory_u16, shared_u16) = make(U16, 2); + } + std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + } + const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); + interfaces.push_back(shared_memory_u32); + + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; + const Id count{Constant(U32[1], size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), + u32_zero_value, u32_zero_value, new_value, + old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (program.info.uses_int8) { + shared_store_u8_func = make_function(24, 8); + } + if (program.info.uses_int16) { + shared_store_u16_func = make_function(16, 16); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; @@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", index)); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } std::fill_n(ssbos.data() + index, desc.count, id); index += desc.count; binding += desc.count; @@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { .image_type{image_type}, }); } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } binding += desc.count; } } @@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: for (size_t i = 0; i < desc.count; ++i) { cbufs[desc.index + i].*member_type = id; } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 01b7b665d..1a4e8221a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -73,6 +73,14 @@ public: UniformDefinitions uniform_types; + Id private_u32{}; + + Id shared_u8{}; + Id shared_u16{}; + Id shared_u32{}; + Id shared_u32x2{}; + Id shared_u32x4{}; + Id input_f32{}; Id input_u32{}; Id input_s32{}; @@ -96,6 +104,17 @@ public: Id base_vertex{}; Id front_face{}; + Id local_memory{}; + + Id shared_memory_u8{}; + Id shared_memory_u16{}; + Id shared_memory_u32{}; + Id shared_memory_u32x2{}; + Id shared_memory_u32x4{}; + + Id shared_store_u8_func{}; + Id shared_store_u16_func{}; + Id input_position{}; std::array input_generics{}; @@ -111,6 +130,8 @@ private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); + void DefineLocalMemory(const IR::Program& program); + void DefineSharedMemory(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 837f0e858..4f62af959 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); @@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4cbc2aec1..52dcef8a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + ctx.OpStore(pointer, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..fa2fc9ab4 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -0,0 +1,175 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); +} + +Id Word(EmitContext& ctx, Id offset) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; + const Id count_id{ctx.Constant(ctx.U32[1], count)}; + return {bit, count_id}; +} +} // Anonymous namespace + +Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; + return ctx.OpLoad(ctx.U32[1], pointer); + } else { + return Word(ctx, offset); + } +} + +Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + return ctx.OpLoad(ctx.U32[2], pointer); + } else { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } +} + +Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + return ctx.OpLoad(ctx.U32[4], pointer); + } + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + std::array values{}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index + : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } + return ctx.OpCompositeConstruct(ctx.U32[4], values); +} + +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); + } +} + +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); + } +} + +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { + Id pointer{}; + if (ctx.profile.support_explicit_workgroup_layout) { + pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); + } else { + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + } + ctx.OpStore(pointer, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); +} + +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index + : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0c62c1c54..9415d02f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -19,6 +19,10 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; + [[nodiscard]] virtual u32 LocalMemorySize() const = 0; + + [[nodiscard]] virtual u32 SharedMemorySize() const = 0; + [[nodiscard]] virtual std::array WorkgroupSize() const = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { Inst(Opcode::WriteGlobal128, address, vector); } +U32 IREmitter::LoadLocal(const IR::U32& word_offset) { + return Inst(Opcode::LoadLocal, word_offset); +} + +void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { + Inst(Opcode::WriteLocal, word_offset, value); +} + +Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { + switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); + case 32: + return Inst(Opcode::LoadSharedU32, offset); + case 64: + return Inst(Opcode::LoadSharedU64, offset); + case 128: + return Inst(Opcode::LoadSharedU128, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size); +} + +void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { + switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + case 128: + Inst(Opcode::WriteSharedU128, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + } +} + U1 IREmitter::GetZeroFromOp(const Value& op) { return Inst(Opcode::GetZeroFromOp, op); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,12 @@ public: void WriteGlobal64(const U64& address, const IR::Value& vector); void WriteGlobal128(const U64& address, const IR::Value& vector); + [[nodiscard]] U32 LoadLocal(const U32& word_offset); + void WriteLocal(const U32& word_offset, const U32& value); + + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const U32& offset, const Value& value); + [[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteStorage32: case Opcode::WriteStorage64: case Opcode::WriteStorage128: + case Opcode::WriteLocal: + case Opcode::WriteSharedU8: + case Opcode::WriteSharedU16: + case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: + case Opcode::WriteSharedU128: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +// Local memory operations +OPCODE(LoadLocal, U32, U32, ) +OPCODE(WriteLocal, Void, U32, U32, ) + +// Shared memory operations +OPCODE(LoadSharedU8, U32, U32, ) +OPCODE(LoadSharedS8, U32, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) +OPCODE(LoadSharedS16, U32, U32, ) +OPCODE(LoadSharedU32, U32, U32, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(LoadSharedU128, U32x4, U32, ) +OPCODE(WriteSharedU8, Void, U32, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) +OPCODE(WriteSharedU32, Void, U32, U32, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) +OPCODE(WriteSharedU128, Void, U32, U32x4, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -21,6 +21,8 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset)); + } else { + const s32 relative{static_cast(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + const Size nnn = encoding.size; + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, ir.LoadLocal(word_offset)); + for (int i = 1; i < bit_size / 32; ++i) { + X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, bit_size / 32)) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDL(u64) { - ThrowNotImplemented(Opcode::LDL); -} - -void TranslatorVisitor::LDS(u64) { - ThrowNotImplemented(Opcode::LDS); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { ThrowNotImplemented(Opcode::ST); } -void TranslatorVisitor::STL(u64) { - ThrowNotImplemented(Opcode::STL); -} - void TranslatorVisitor::STP(u64) { ThrowNotImplemented(Opcode::STP); } -void TranslatorVisitor::STS(u64) { - ThrowNotImplemented(Opcode::STS); -} - void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60be67228..c932c307b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: + case IR::Opcode::LoadSharedU8: + case IR::Opcode::LoadSharedS8: + case IR::Opcode::WriteSharedU8: case IR::Opcode::SelectU8: case IR::Opcode::ConvertF16S8: case IR::Opcode::ConvertF16U8: @@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS16: case IR::Opcode::WriteStorageU16: case IR::Opcode::WriteStorageS16: + case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedS16: + case IR::Opcode::WriteSharedU16: case IR::Opcode::SelectU16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e26047751..0276fc23b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,8 @@ enum class AttributeType : u8 { }; struct Profile { + u32 supported_spirv{0x00010000}; + bool unified_descriptor_binding{}; bool support_vertex_instance_id{}; bool support_float_controls{}; @@ -30,6 +32,7 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_explicit_workgroup_layout{}; bool support_vote{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 69dd945b2..0d6a32bfd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,10 +114,12 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -132,7 +134,10 @@ public: file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + const u32 shared_memory_size{SharedMemorySize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), + sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } @@ -278,6 +283,16 @@ public: return maxwell3d->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const u64 size{sph.LocalMemorySize()}; + ASSERT(size <= std::numeric_limits::max()); + return static_cast(size); + } + + u32 SharedMemorySize() const override { + throw Shader::LogicError("Requesting shared memory size in graphics stage"); + } + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -313,6 +328,16 @@ public: return kepler_compute->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.local_pos_alloc; + } + + u32 SharedMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.shared_alloc; + } + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; @@ -366,6 +391,7 @@ public: u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -381,7 +407,8 @@ public: texture_types.emplace(key, type); } if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); } @@ -402,6 +429,14 @@ public: return it->second; } + u32 LocalMemorySize() const override { + return local_memory_size; + } + + u32 SharedMemorySize() const override { + return shared_memory_size; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -414,6 +449,8 @@ private: std::unique_ptr code; std::unordered_map texture_types; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; @@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ + .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, @@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { shader = MakeShaderInfo(env, *cpu_shader_addr); } const ComputePipelineCacheKey key{ - .unique_hash = shader->unique_hash, - .shared_memory_size = qmd.shared_alloc, + .unique_hash{shader->unique_hash}, + .shared_memory_size{qmd.shared_alloc}, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 009b74f12..c027598ba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; + if (khr_workgroup_memory_explicit_layout) { + workgroup_layout = { + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .workgroupMemoryExplicitLayout = VK_TRUE, + .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, + .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, + .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, + }; + SetNext(next, workgroup_layout); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -662,6 +676,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } bool has_khr_shader_float16_int8{}; + bool has_khr_workgroup_memory_explicit_layout{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -682,6 +697,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -694,6 +710,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_khr_workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -787,6 +805,22 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_extended_dynamic_state = true; } } + if (has_khr_workgroup_memory_explicit_layout) { + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; + layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; + layout.pNext = nullptr; + features.pNext = &layout; + physical.GetFeatures2KHR(features); + + if (layout.workgroupMemoryExplicitLayout && + layout.workgroupMemoryExplicitLayout8BitAccess && + layout.workgroupMemoryExplicitLayout16BitAccess && + layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { + extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + khr_workgroup_memory_explicit_layout = true; + } + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c268a4f8d..ac2311e7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -168,11 +168,21 @@ public: return nv_viewport_swizzle; } - /// Returns true if the device supports VK_EXT_scalar_block_layout. + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; } + /// Returns true if the device supports VK_KHR_spirv_1_4. + bool IsKhrSpirv1_4Supported() const { + return khr_spirv_1_4; + } + + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. + bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { + return khr_workgroup_memory_explicit_layout; + } + /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { return ext_index_type_uint8; @@ -296,20 +306,22 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. + bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. + bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters std::string vendor_name; ///< Device's driver name. -- cgit v1.2.3 From 479ca00071ccaab6ca9ac28daf375e1ed15dc447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:50:30 -0300 Subject: nsight_aftermath_tracker: Report used shaders to Nsight Aftermath --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- .../vulkan_common/nsight_aftermath_tracker.cpp | 5 ++--- .../vulkan_common/nsight_aftermath_tracker.h | 21 +++++++++++---------- src/video_core/vulkan_common/vulkan_device.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 3 ++- 6 files changed, 20 insertions(+), 16 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2cfe9d4bd..ec9866605 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + device.SaveShader(code); pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25dbefd5c..f699a9bdf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -770,6 +770,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; + device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], @@ -846,7 +847,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program, binding)}; + device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 758c038ba..209cb1e0a 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() { } } -void NsightAftermathTracker::SaveShader(const std::vector& spirv) const { +void NsightAftermathTracker::SaveShader(std::span spirv) const { if (!initialized) { return; } - - std::vector spirv_copy = spirv; + std::vector spirv_copy(spirv.begin(), spirv.end()); GFSDK_Aftermath_SpirvCode shader; shader.pData = spirv_copy.data(); shader.size = static_cast(spirv_copy.size() * 4); diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index 4fe2b14d9..eae1891dd 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -33,7 +34,7 @@ public: NsightAftermathTracker(NsightAftermathTracker&&) = delete; NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; private: #ifdef HAS_NSIGHT_AFTERMATH @@ -61,21 +62,21 @@ private: bool initialized = false; Common::DynamicLibrary dl; - PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; - PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; - PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; - PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; - PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; - PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{}; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{}; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{}; #endif }; #ifndef HAS_NSIGHT_AFTERMATH inline NsightAftermathTracker::NsightAftermathTracker() = default; inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline void NsightAftermathTracker::SaveShader(const std::vector&) const {} +inline void NsightAftermathTracker::SaveShader(std::span) const {} #endif } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c027598ba..78bb741bc 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -493,7 +493,7 @@ void Device::ReportLoss() const { std::this_thread::sleep_for(std::chrono::seconds{15}); } -void Device::SaveShader(const std::vector& spirv) const { +void Device::SaveShader(std::span spirv) const { if (nsight_aftermath_tracker) { nsight_aftermath_tracker->SaveShader(spirv); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ac2311e7e..adf62a707 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -43,7 +44,7 @@ public: void ReportLoss() const; /// Reports a shader to Nsight Aftermath. - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; /// Returns the name of the VkDriverId reported from Vulkan. std::string GetDriverName() const; -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_context.cpp | 158 +++++- src/shader_recompiler/backend/spirv/emit_context.h | 20 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 + src/shader_recompiler/backend/spirv/emit_spirv.h | 95 ++++ .../backend/spirv/emit_spirv_atomic.cpp | 528 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 200 +++++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 39 ++ .../frontend/ir/microinstruction.cpp | 66 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 70 +++ .../impl/atomic_operations_global_memory.cpp | 222 +++++++++ .../impl/atomic_operations_shared_memory.cpp | 110 +++++ .../maxwell/translate/impl/not_implemented.cpp | 12 - .../ir_opt/collect_shader_info_pass.cpp | 70 +++ .../global_memory_to_storage_buffer_pass.cpp | 121 ++++- .../ir_opt/lower_fp16_to_fp32.cpp | 12 + src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 13 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 14 + src/video_core/vulkan_common/vulkan_device.h | 6 + 21 files changed, 1745 insertions(+), 19 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8e1d37373..7b9f08aa0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_atomic.cpp backend/spirv/emit_spirv_barriers.cpp backend/spirv/emit_spirv_bitwise_conversion.cpp backend/spirv/emit_spirv_composite.cpp @@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h + frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp + frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp frontend/maxwell/translate/impl/barrier_operations.cpp frontend/maxwell/translate/impl/bitfield_extract.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 32f8c4508..e5d83e9b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,53 @@ namespace Shader::Backend::SPIRV { namespace { +enum class CasFunctionType { + Increment, + Decrement, + FPAdd, + FPMin, + FPMax, +}; + +Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (function_type) { + case CasFunctionType::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case CasFunctionType::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case CasFunctionType::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case CasFunctionType::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case CasFunctionType::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; @@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) { } } +Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type + : storage_memory_u32}; + const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; + const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{OpFunctionParameter(U32[1])}; + const Id op_b{OpFunctionParameter(value_type)}; + const Id base{OpFunctionParameter(storage_type)}; + AddLabel(); + const Id one{Constant(U32[1], 1)}; + OpBranch(loop_header); + AddLabel(loop_header); + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{pointer_type == CasPointerType::Shared + ? OpAccessChain(shared_u32, base, index) + : OpAccessChain(storage_u32, base, u32_zero_value, index)}; + if (value_type.value == F32[2].value) { + const Id u32_value{OpLoad(U32[1], word_pointer)}; + const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; + const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; + const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, + u32_zero_value, u32_new_value, u32_value)}; + const Id success{OpIEqual(U1, atomic_res, u32_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); + } else { + const Id value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitcast( + U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, + u32_zero_value, new_value, value)}; + const Id success{OpIEqual(U1, atomic_res, value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturnValue(OpBitcast(value_type, atomic_res)); + } + OpFunctionEnd(); + return func; +} + void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; - const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); - shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); + shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; @@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (program.info.uses_int16) { shared_store_u16_func = make_function(16, 16); } + if (program.info.uses_shared_increment) { + const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; + increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); + } + if (program.info.uses_shared_decrement) { + const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; + decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); + } } void EmitContext::DefineAttributeMemAccess(const Info& info) { @@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { MemberName(struct_type, 0, "data"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); u32 index{}; for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { - const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; + const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", index)); @@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { index += desc.count; binding += desc.count; } + if (info.uses_global_increment) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; + increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_global_decrement) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; + decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_atomic_f32_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; + f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); + } + if (info.uses_atomic_f16x2_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; + f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f16x2_min) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; + f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f16x2_max) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; + f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f32x2_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; + f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); + } + if (info.uses_atomic_f32x2_min) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; + f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + } + if (info.uses_atomic_f32x2_max) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; + f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + } } void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e70f3458c..34f38454f 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -94,6 +94,7 @@ public: Id output_f32{}; Id storage_u32{}; + Id storage_memory_u32{}; Id image_buffer_type{}; Id sampled_texture_buffer_type{}; @@ -136,9 +137,21 @@ public: Id shared_memory_u32{}; Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u32_type{}; Id shared_store_u8_func{}; Id shared_store_u16_func{}; + Id increment_cas_shared{}; + Id increment_cas_ssbo{}; + Id decrement_cas_shared{}; + Id decrement_cas_ssbo{}; + Id f32_add_cas{}; + Id f16x2_add_cas{}; + Id f16x2_min_cas{}; + Id f16x2_max_cas{}; + Id f32x2_add_cas{}; + Id f32x2_min_cas{}; + Id f32x2_max_cas{}; Id input_position{}; std::array input_generics{}; @@ -153,6 +166,11 @@ public: std::vector interfaces; private: + enum class CasPointerType { + Shared, + Ssbo, + }; + void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); @@ -171,6 +189,8 @@ private: void DefineInputs(const Info& info); void DefineOutputs(const Info& info); + + [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5a1ffd61c..9248bd78b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } + if (info.uses_64_bit_atomics && profile.support_int64_atomics) { + ctx.AddCapability(spv::Capability::Int64Atomics); + } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 12b7993ae..a3398a605 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..03d891419 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -0,0 +1,528 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { + +Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; + return ctx.profile.support_explicit_workgroup_layout + ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) + : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); +} + +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast(offset.U32() / element_size)}; + return ctx.Constant(ctx.U32[1], imm_offset); + } + const u32 shift{static_cast(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + +Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 index_offset = 0) { + // TODO: Support reinterpreting bindings, guaranteed to be aligned + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; + return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); +} + +std::pair GetAtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; + return ctx.OpBitcast(ctx.U64, original_composite); +} + +void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { + const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); +} +} // Anonymous namespace + +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, + ctx.shared_memory_u32); +} + +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, + ctx.shared_memory_u32); +} + +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + StoreResult(ctx, pointer_1, pointer_2, value); + return original_value; +} + +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + StoreResult(ctx, pointer_1, pointer_2, value); + return original_value; +} + +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); +} + +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 17be0c639..a3339f624 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); } +U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicIAdd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMin(pointer_offset, value) + : SharedAtomicUMin(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMax(pointer_offset, value) + : SharedAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicDec32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicAnd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicOr32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicXor32, pointer_offset, value); +} + +U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::SharedAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicIAdd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicIAdd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMin(pointer_offset, value) + : GlobalAtomicUMin(pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMax(pointer_offset, value) + : GlobalAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicDec32, pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicAnd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicAnd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicOr32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicOr64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicXor32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicXor64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst(Opcode::LogicalOr, a, b); } @@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst } void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { + TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; Inst(op, Flags{info}, handle, coords, color); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ec60070ef..f9cbf1304 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -228,6 +228,45 @@ public: [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); + + [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); + + [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 2df631791..0f66c5627 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::WriteSharedU128: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicInc32: + case Opcode::SharedAtomicDec32: + case Opcode::SharedAtomicAnd32: + case Opcode::SharedAtomicOr32: + case Opcode::SharedAtomicXor32: + case Opcode::SharedAtomicExchange32: + case Opcode::SharedAtomicExchange64: + case Opcode::GlobalAtomicIAdd32: + case Opcode::GlobalAtomicSMin32: + case Opcode::GlobalAtomicUMin32: + case Opcode::GlobalAtomicSMax32: + case Opcode::GlobalAtomicUMax32: + case Opcode::GlobalAtomicInc32: + case Opcode::GlobalAtomicDec32: + case Opcode::GlobalAtomicAnd32: + case Opcode::GlobalAtomicOr32: + case Opcode::GlobalAtomicXor32: + case Opcode::GlobalAtomicExchange32: + case Opcode::GlobalAtomicIAdd64: + case Opcode::GlobalAtomicSMin64: + case Opcode::GlobalAtomicUMin64: + case Opcode::GlobalAtomicSMax64: + case Opcode::GlobalAtomicUMax64: + case Opcode::GlobalAtomicAnd64: + case Opcode::GlobalAtomicOr64: + case Opcode::GlobalAtomicXor64: + case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicAddF32: + case Opcode::GlobalAtomicAddF16x2: + case Opcode::GlobalAtomicAddF32x2: + case Opcode::GlobalAtomicMinF16x2: + case Opcode::GlobalAtomicMinF32x2: + case Opcode::GlobalAtomicMaxF16x2: + case Opcode::GlobalAtomicMaxF32x2: + case Opcode::StorageAtomicIAdd32: + case Opcode::StorageAtomicSMin32: + case Opcode::StorageAtomicUMin32: + case Opcode::StorageAtomicSMax32: + case Opcode::StorageAtomicUMax32: + case Opcode::StorageAtomicInc32: + case Opcode::StorageAtomicDec32: + case Opcode::StorageAtomicAnd32: + case Opcode::StorageAtomicOr32: + case Opcode::StorageAtomicXor32: + case Opcode::StorageAtomicExchange32: + case Opcode::StorageAtomicIAdd64: + case Opcode::StorageAtomicSMin64: + case Opcode::StorageAtomicUMin64: + case Opcode::StorageAtomicSMax64: + case Opcode::StorageAtomicUMax64: + case Opcode::StorageAtomicAnd64: + case Opcode::StorageAtomicOr64: + case Opcode::StorageAtomicXor64: + case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicAddF32: + case Opcode::StorageAtomicAddF16x2: + case Opcode::StorageAtomicAddF32x2: + case Opcode::StorageAtomicMinF16x2: + case Opcode::StorageAtomicMinF32x2: + case Opcode::StorageAtomicMaxF16x2: + case Opcode::StorageAtomicMaxF32x2: case Opcode::BindlessImageWrite: case Opcode::BoundImageWrite: case Opcode::ImageWrite: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 86ea02560..dc776a73e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32, OPCODE(SGreaterThanEqual, U1, U32, U32, ) OPCODE(UGreaterThanEqual, U1, U32, U32, ) +// Atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicInc32, U32, U32, U32, ) +OPCODE(SharedAtomicDec32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange64, U64, U32, U64, ) + +OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicInc32, U32, U64, U32, ) +OPCODE(GlobalAtomicDec32, U32, U64, U32, ) +OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) +OPCODE(GlobalAtomicOr32, U32, U64, U32, ) +OPCODE(GlobalAtomicXor32, U32, U64, U32, ) +OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) +OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) +OPCODE(GlobalAtomicOr64, U64, U64, U64, ) +OPCODE(GlobalAtomicXor64, U64, U64, U64, ) +OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) +OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) + +OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) +OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) + // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,222 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::DontCare}, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + + const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; + const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; + const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + IR::Value result; + + if (AtomOpNotApplicable(atom.size, atom.op)) { + result = LoadGlobal(ir, offset, atom.size); + } else if (!is_integer) { + if (atom.size == AtomSize::F32) { + result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; + result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); + } + } else if (size_64) { + result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); + } else { + result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); + } + StoreResult(*this, atom.dest_reg, result, atom.size); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg_b; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + + if (AtomOpNotApplicable(red.size, red.op)) { + return; + } + const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; + const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; + const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + if (!is_integer) { + if (red.size == AtomSize::F32) { + ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; + ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); + } + } else if (size_64) { + ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); + } else { + ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -void TranslatorVisitor::ATOM(u64) { - ThrowNotImplemented(Opcode::ATOM); -} - void TranslatorVisitor::ATOMS_cas(u64) { ThrowNotImplemented(Opcode::ATOMS_cas); } -void TranslatorVisitor::ATOMS(u64) { - ThrowNotImplemented(Opcode::ATOMS); -} - void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -void TranslatorVisitor::RED(u64) { - ThrowNotImplemented(Opcode::RED); -} - void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9ef8688c9..73373576b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPOrdGreaterThanEqual16: case IR::Opcode::FPUnordGreaterThanEqual16: case IR::Opcode::FPIsNan16: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: info.uses_fp16 = true; break; case IR::Opcode::CompositeConstructF64x2: @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF16U64: case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: + case IR::Opcode::SharedAtomicExchange64: info.uses_int64 = true; break; default: @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::SharedAtomicInc32: + info.uses_shared_increment = true; + break; + case IR::Opcode::SharedAtomicDec32: + info.uses_shared_decrement = true; + break; + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::StorageAtomicInc32: + info.uses_global_increment = true; + break; + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::StorageAtomicDec32: + info.uses_global_decrement = true; + break; + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::StorageAtomicAddF32: + info.uses_atomic_f32_add = true; + break; + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + info.uses_atomic_f16x2_add = true; + break; + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::StorageAtomicAddF32x2: + info.uses_atomic_f32x2_add = true; + break; + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + info.uses_atomic_f16x2_min = true; + break; + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::StorageAtomicMinF32x2: + info.uses_atomic_f32x2_min = true; + break; + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: + info.uses_atomic_f16x2_max = true; + break; + case IR::Opcode::GlobalAtomicMaxF32x2: + case IR::Opcode::StorageAtomicMaxF32x2: + info.uses_atomic_f32x2_max = true; + break; + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: + info.uses_64_bit_atomics = true; + break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_64_bit_atomics = true; + info.uses_shared_memory_u32x2 = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index afe871505..0d4f266c3 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: return true; default: return false; @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::WriteStorage64; case IR::Opcode::WriteGlobal128: return IR::Opcode::WriteStorage128; + case IR::Opcode::GlobalAtomicIAdd32: + return IR::Opcode::StorageAtomicIAdd32; + case IR::Opcode::GlobalAtomicSMin32: + return IR::Opcode::StorageAtomicSMin32; + case IR::Opcode::GlobalAtomicUMin32: + return IR::Opcode::StorageAtomicUMin32; + case IR::Opcode::GlobalAtomicSMax32: + return IR::Opcode::StorageAtomicSMax32; + case IR::Opcode::GlobalAtomicUMax32: + return IR::Opcode::StorageAtomicUMax32; + case IR::Opcode::GlobalAtomicInc32: + return IR::Opcode::StorageAtomicInc32; + case IR::Opcode::GlobalAtomicDec32: + return IR::Opcode::StorageAtomicDec32; + case IR::Opcode::GlobalAtomicAnd32: + return IR::Opcode::StorageAtomicAnd32; + case IR::Opcode::GlobalAtomicOr32: + return IR::Opcode::StorageAtomicOr32; + case IR::Opcode::GlobalAtomicXor32: + return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicIAdd64: + return IR::Opcode::StorageAtomicIAdd64; + case IR::Opcode::GlobalAtomicSMin64: + return IR::Opcode::StorageAtomicSMin64; + case IR::Opcode::GlobalAtomicUMin64: + return IR::Opcode::StorageAtomicUMin64; + case IR::Opcode::GlobalAtomicSMax64: + return IR::Opcode::StorageAtomicSMax64; + case IR::Opcode::GlobalAtomicUMax64: + return IR::Opcode::StorageAtomicUMax64; + case IR::Opcode::GlobalAtomicAnd64: + return IR::Opcode::StorageAtomicAnd64; + case IR::Opcode::GlobalAtomicOr64: + return IR::Opcode::StorageAtomicOr64; + case IR::Opcode::GlobalAtomicXor64: + return IR::Opcode::StorageAtomicXor64; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; + case IR::Opcode::GlobalAtomicExchange64: + return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicAddF32: + return IR::Opcode::StorageAtomicAddF32; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF16x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF16x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF16x2; + case IR::Opcode::GlobalAtomicAddF32x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF32x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF32x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: throw InvalidArgument("Invalid global memory opcode {}", opcode); } @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index inst.Invalidate(); } +/// Replace an atomic operation on global memory instruction with its storage buffer equivalent +void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{ + &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; + inst.ReplaceUsesWith(value); +} + /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return ReplaceAtomic(block, inst, storage_index, offset); default: throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { CollectStorageBuffers(*block, inst, info); } } - u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ .cbuf_index = storage_buffer.index, @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); - ++storage_index; } for (const StorageInst& storage_inst : info.to_replace) { const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 52576b07f..62e73d52d 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::ConvertF32U32; case IR::Opcode::ConvertF16U64: return IR::Opcode::ConvertF32U64; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::GlobalAtomicAddF32x2; + case IR::Opcode::StorageAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::GlobalAtomicMinF32x2; + case IR::Opcode::StorageAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::GlobalAtomicMaxF32x2; + case IR::Opcode::StorageAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: return op; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0d68d516..a4e41bda1 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,7 @@ struct Profile { bool support_viewport_index_layer_non_geometry{}; bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; + bool support_int64_atomics{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3fbe99268..7bcecf554 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -128,6 +128,19 @@ struct Info { bool uses_subgroup_mask{}; bool uses_fswzadd{}; bool uses_typeless_image_reads{}; + bool uses_shared_increment{}; + bool uses_shared_decrement{}; + bool uses_global_increment{}; + bool uses_global_decrement{}; + bool uses_atomic_f32_add{}; + bool uses_atomic_f16x2_add{}; + bool uses_atomic_f16x2_min{}; + bool uses_atomic_f16x2_max{}; + bool uses_atomic_f32x2_add{}; + bool uses_atomic_f32x2_min{}; + bool uses_atomic_f32x2_max{}; + bool uses_64_bit_atomics{}; + bool uses_shared_memory_u32x2{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f699a9bdf..b953d694b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, device.IsExtShaderViewportIndexLayerSupported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 78bb741bc..911dfed44 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -681,6 +681,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_shader_atomic_int64{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -710,6 +711,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { @@ -760,6 +762,18 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_shader_atomic_int64) { + VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; + atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + atomic_int64.pNext = nullptr; + features.pNext = &atomic_int64; + physical.GetFeatures2KHR(features); + + if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { + extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + ext_shader_atomic_int64 = true; + } + } if (has_ext_transform_feedback) { VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index adf62a707..4e6d13308 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -229,6 +229,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. + bool IsExtShaderAtomicInt64Supported() const { + return ext_shader_atomic_int64; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -320,6 +325,7 @@ private: bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From fa75b9b0626c8e118e27207dd1e82e2f415fc0bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 05:32:21 -0300 Subject: spirv: Rework storage buffers and shader memory --- .../backend/spirv/emit_context.cpp | 440 ++++++++++++--------- src/shader_recompiler/backend/spirv/emit_context.h | 49 ++- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 20 +- .../backend/spirv/emit_spirv_atomic.cpp | 333 ++++++---------- .../backend/spirv/emit_spirv_memory.cpp | 135 +++---- .../ir_opt/collect_shader_info_pass.cpp | 69 +++- src/shader_recompiler/shader_info.h | 4 +- src/video_core/vulkan_common/vulkan_device.cpp | 29 +- 9 files changed, 581 insertions(+), 500 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 01b77a7d1..df53e58a8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,7 +15,7 @@ namespace Shader::Backend::SPIRV { namespace { -enum class CasFunctionType { +enum class Operation { Increment, Decrement, FPAdd, @@ -23,44 +23,11 @@ enum class CasFunctionType { FPMax, }; -Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { - const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; - const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; - const Id op_a{ctx.OpFunctionParameter(value_type)}; - const Id op_b{ctx.OpFunctionParameter(value_type)}; - ctx.AddLabel(); - Id result{}; - switch (function_type) { - case CasFunctionType::Increment: { - const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; - const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; - result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); - break; - } - case CasFunctionType::Decrement: { - const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; - const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; - const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; - const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; - result = ctx.OpSelect(value_type, pred, op_b, decr); - break; - } - case CasFunctionType::FPAdd: - result = ctx.OpFAdd(value_type, op_a, op_b); - break; - case CasFunctionType::FPMin: - result = ctx.OpFMin(value_type, op_a, op_b); - break; - case CasFunctionType::FPMax: - result = ctx.OpFMax(value_type, op_a, op_b); - break; - default: - break; - } - ctx.OpReturnValue(result); - ctx.OpFunctionEnd(); - return func; -} +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; @@ -182,12 +149,6 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { throw InvalidArgument("Invalid attribute type {}", type); } -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - std::optional AttrTypes(EmitContext& ctx, u32 index) { const AttributeType type{ctx.profile.generic_input_types.at(index)}; switch (type) { @@ -203,6 +164,164 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } +void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 65536U / element_size))}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberName(struct_type, 0, "data"); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; + ctx.uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + ctx.cbufs[desc.index + i].*member_type = id; + } + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + binding += desc.count; + } +} + +void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, + u32 stride) { + const Id array_type{ctx.TypeRuntimeArray(type)}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + type_def.array = struct_pointer; + type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); + + u32 index{}; + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("ssbo{}", index)); + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + for (size_t i = 0; i < desc.count; ++i) { + ctx.ssbos[index + i].*member_type = id; + } + index += desc.count; + binding += desc.count; + } +} + +Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (operation) { + case Operation::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case Operation::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case Operation::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case Operation::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case Operation::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + +Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, + Id value_type, Id memory_type, spv::Scope scope) { + const bool is_shared{scope == spv::Scope::Workgroup}; + const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; + const Id cas_func{CasFunction(ctx, operation, value_type)}; + const Id zero{ctx.u32_zero_value}; + const Id scope_id{ctx.Constant(ctx.U32[1], static_cast(scope))}; + + const Id loop_header{ctx.OpLabel()}; + const Id continue_block{ctx.OpLabel()}; + const Id merge_block{ctx.OpLabel()}; + const Id func_type{is_shared + ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) + : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; + + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; + ctx.AddLabel(); + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + + ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + ctx.OpBranch(continue_block); + + ctx.AddLabel(continue_block); + const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) + : ctx.OpAccessChain(element_pointer, base, index)}; + if (value_type.value == ctx.F32[2].value) { + const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; + const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; + const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; + const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, u32_new_value, u32_value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); + } else { + const Id value{ctx.OpLoad(memory_type, word_pointer)}; + const bool matching_type{value_type.value == memory_type.value}; + const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; + const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; + const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, new_value, value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); + } + ctx.OpFunctionEnd(); + return func; +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -226,6 +345,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineInterfaces(program.info); DefineLocalMemory(program); DefineSharedMemory(program); + DefineSharedMemoryFunctions(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); @@ -263,56 +383,6 @@ Id EmitContext::Def(const IR::Value& value) { } } -Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { - const Id loop_header{OpLabel()}; - const Id continue_block{OpLabel()}; - const Id merge_block{OpLabel()}; - const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type - : storage_memory_u32}; - const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; - const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; - const Id index{OpFunctionParameter(U32[1])}; - const Id op_b{OpFunctionParameter(value_type)}; - const Id base{OpFunctionParameter(storage_type)}; - AddLabel(); - const Id one{Constant(U32[1], 1)}; - OpBranch(loop_header); - AddLabel(loop_header); - OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); - OpBranch(continue_block); - - AddLabel(continue_block); - const Id word_pointer{pointer_type == CasPointerType::Shared - ? OpAccessChain(shared_u32, base, index) - : OpAccessChain(storage_u32, base, u32_zero_value, index)}; - if (value_type.value == F32[2].value) { - const Id u32_value{OpLoad(U32[1], word_pointer)}; - const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; - const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; - const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, - u32_zero_value, u32_new_value, u32_value)}; - const Id success{OpIEqual(U1, atomic_res, u32_value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); - } else { - const Id value{OpLoad(U32[1], word_pointer)}; - const Id new_value{OpBitcast( - U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, - u32_zero_value, new_value, value)}; - const Id success{OpIEqual(U1, atomic_res, value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturnValue(OpBitcast(value_type, atomic_res)); - } - OpFunctionEnd(); - return func; -} - void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -397,27 +467,31 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { Decorate(variable, spv::Decoration::Aliased); interfaces.push_back(variable); - return std::make_pair(variable, element_pointer); + return std::make_tuple(variable, element_pointer, pointer); }}; if (profile.support_explicit_workgroup_layout) { AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); if (program.info.uses_int8) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); - std::tie(shared_memory_u8, shared_u8) = make(U8, 1); + std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); } if (program.info.uses_int16) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); - std::tie(shared_memory_u16, shared_u16) = make(U16, 2); + std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); + } + if (program.info.uses_int64) { + std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); } - std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); - std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); - std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); @@ -463,13 +537,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (program.info.uses_int16) { shared_store_u16_func = make_function(16, 16); } +} + +void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { if (program.info.uses_shared_increment) { - const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; - increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); + increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); } if (program.info.uses_shared_decrement) { - const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; - decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); + decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); } } @@ -628,21 +705,24 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { return; } if (True(info.used_constant_buffer_types & IR::Type::U8)) { - DefineConstantBuffers(info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); - DefineConstantBuffers(info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); } if (True(info.used_constant_buffer_types & IR::Type::U16)) { - DefineConstantBuffers(info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); - DefineConstantBuffers(info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); } if (True(info.used_constant_buffer_types & IR::Type::U32)) { - DefineConstantBuffers(info, &UniformDefinitions::U32, binding, U32[1], 'u', sizeof(u32)); + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); } if (True(info.used_constant_buffer_types & IR::Type::F32)) { - DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32)); + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); } if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { - DefineConstantBuffers(info, &UniformDefinitions::U32x2, binding, U32[2], 'u', sizeof(u64)); + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); } for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { binding += desc.count; @@ -655,75 +735,83 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } AddExtension("SPV_KHR_storage_buffer_storage_class"); - const Id array_type{TypeRuntimeArray(U32[1])}; - Decorate(array_type, spv::Decoration::ArrayStride, 4U); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, "ssbo_block"); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); - storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); - - u32 index{}; + if (True(info.used_storage_buffer_types & IR::Type::U8)) { + DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, + sizeof(u8)); + DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, + sizeof(u8)); + } + if (True(info.used_storage_buffer_types & IR::Type::U16)) { + DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, + sizeof(u16)); + DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, + sizeof(u16)); + } + if (True(info.used_storage_buffer_types & IR::Type::U32)) { + DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], + sizeof(u32)); + } + if (True(info.used_storage_buffer_types & IR::Type::F32)) { + DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], + sizeof(f32)); + } + if (True(info.used_storage_buffer_types & IR::Type::U64)) { + DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, + sizeof(u64)); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { + DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], + sizeof(u32[2])); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x4)) { + DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], + sizeof(u32[4])); + } for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { - const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("ssbo{}", index)); - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - std::fill_n(ssbos.data() + index, desc.count, id); - index += desc.count; binding += desc.count; } - if (info.uses_global_increment) { + const bool needs_function{ + info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || + info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || + info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; + if (needs_function) { AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; - increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_global_increment) { + increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); } if (info.uses_global_decrement) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; - decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); + decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); } if (info.uses_atomic_f32_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; - f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); + f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); } if (info.uses_atomic_f16x2_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; - f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); + f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f16x2_min) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; - f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f16x2_max) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; - f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; - f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); + f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_min) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; - f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_max) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; - f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } } @@ -903,36 +991,6 @@ void EmitContext::DefineInputs(const Info& info) { } } -void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, - u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; - uniform_types.*member_type = uniform_type; - - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("c{}", desc.index)); - for (size_t i = 0; i < desc.count; ++i) { - cbufs[desc.index + i].*member_type = id; - } - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - binding += desc.count; - } -} - void EmitContext::DefineOutputs(const Info& info) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 98a9140bf..cade1fa0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -50,6 +50,35 @@ struct UniformDefinitions { Id U32x2{}; }; +struct StorageTypeDefinition { + Id array{}; + Id element{}; +}; + +struct StorageTypeDefinitions { + StorageTypeDefinition U8{}; + StorageTypeDefinition S8{}; + StorageTypeDefinition U16{}; + StorageTypeDefinition S16{}; + StorageTypeDefinition U32{}; + StorageTypeDefinition U64{}; + StorageTypeDefinition F32{}; + StorageTypeDefinition U32x2{}; + StorageTypeDefinition U32x4{}; +}; + +struct StorageDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; + Id U32x2{}; + Id U32x4{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); @@ -78,12 +107,14 @@ public: Id f32_zero_value{}; UniformDefinitions uniform_types; + StorageTypeDefinitions storage_types; Id private_u32{}; Id shared_u8{}; Id shared_u16{}; Id shared_u32{}; + Id shared_u64{}; Id shared_u32x2{}; Id shared_u32x4{}; @@ -93,14 +124,11 @@ public: Id output_f32{}; - Id storage_u32{}; - Id storage_memory_u32{}; - Id image_buffer_type{}; Id sampled_texture_buffer_type{}; std::array cbufs{}; - std::array ssbos{}; + std::array ssbos{}; std::vector texture_buffers; std::vector textures; std::vector images; @@ -136,8 +164,10 @@ public: Id shared_memory_u8{}; Id shared_memory_u16{}; Id shared_memory_u32{}; + Id shared_memory_u64{}; Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u32_type{}; Id shared_store_u8_func{}; @@ -167,16 +197,12 @@ public: std::vector interfaces; private: - enum class CasPointerType { - Shared, - Ssbo, - }; - void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); void DefineLocalMemory(const IR::Program& program); void DefineSharedMemory(const IR::Program& program); + void DefineSharedMemoryFunctions(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); @@ -185,13 +211,8 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); - void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, - Id type, char type_char, u32 element_size); - void DefineInputs(const Info& info); void DefineOutputs(const Info& info); - - [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d7c5890ab..61a2018d7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -276,7 +276,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } - if (info.uses_64_bit_atomics && profile.support_int64_atomics) { + if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { ctx.AddCapability(spv::Capability::Int64Atomics); } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index c0e1b8833..55b2edba0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -89,17 +89,21 @@ void EmitWriteGlobalS16(EmitContext& ctx); void EmitWriteGlobal32(EmitContext& ctx); void EmitWriteGlobal64(EmitContext& ctx); void EmitWriteGlobal128(EmitContext& ctx); -void EmitLoadStorageU8(EmitContext& ctx); -void EmitLoadStorageS8(EmitContext& ctx); -void EmitLoadStorageU16(EmitContext& ctx); -void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx); -void EmitWriteStorageS8(EmitContext& ctx); -void EmitWriteStorageU16(EmitContext& ctx); -void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 03d891419..aab32dc52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -6,11 +6,12 @@ namespace Shader::Backend::SPIRV { namespace { - -Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; + Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + if (index_offset > 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Constant(ctx.U32[1], index_offset)); + } return ctx.profile.support_explicit_workgroup_layout ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); @@ -30,340 +31,258 @@ Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - u32 index_offset = 0) { - // TODO: Support reinterpreting bindings, guaranteed to be aligned +Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_ptr, const IR::Value& binding, + const IR::Value& offset, size_t element_size) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; - return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); } -std::pair GetAtomicArgs(EmitContext& ctx) { +std::pair AtomicArgs(EmitContext& ctx) { const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; return {scope, semantics}; } -Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { - const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; - const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; - const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; - return ctx.OpBitcast(ctx.U64, original_composite); +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{SharedPointer(ctx, offset)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } -void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { - const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; - ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); - ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); +Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, + offset, sizeof(u32))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); + } + // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; + ctx.OpStore(pointer, result); + return original_value; } } // Anonymous namespace -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); } -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); } -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); } -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); } -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); } -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; - return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, - ctx.shared_memory_u32); + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); } -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; - return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, - ctx.shared_memory_u32); + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); } -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); } -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); } -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); } -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); } -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { + const Id shift_id{ctx.Constant(ctx.U32[1], 3U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - StoreResult(ctx, pointer_1, pointer_2, value); - return original_value; + // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); } Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); } Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); } Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); } Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); } Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); } Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); } Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); } Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); } Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); } Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); } Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); } Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, + &Sirit::Module::OpIAdd); } Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, + &Sirit::Module::OpSMin); } Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, + &Sirit::Module::OpUMin); } Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, + &Sirit::Module::OpSMax); } Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, + &Sirit::Module::OpUMax); } Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, + &Sirit::Module::OpBitwiseAnd); } Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, + &Sirit::Module::OpBitwiseOr); } Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, + &Sirit::Module::OpBitwiseXor); } Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - StoreResult(ctx, pointer_1, pointer_2, value); - return original_value; + // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + ctx.OpStore(pointer, value); + return original; } Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); } Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -371,7 +290,7 @@ Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); @@ -379,7 +298,7 @@ Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -387,7 +306,7 @@ Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); @@ -395,7 +314,7 @@ Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -403,7 +322,7 @@ Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 088bd3059..a8f2ea5a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,29 +22,29 @@ Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -Id EmitLoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - u32 num_components) { - // TODO: Support reinterpreting bindings, guaranteed to be aligned +Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - std::array components; - for (u32 element = 0; element < num_components; ++element) { - Id index{base_index}; - if (element > 0) { - index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); - } - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - components[element] = ctx.OpLoad(ctx.U32[1], pointer); - } - if (num_components == 1) { - return components[0]; - } else { - const std::span components_span(components.data(), num_components); - return ctx.OpCompositeConstruct(ctx.U32[num_components], components_span); - } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { + const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + return ctx.OpLoad(result_type, pointer); +} + +void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { + const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + ctx.OpStore(pointer, value); } } // Anonymous namespace @@ -104,92 +104,85 @@ void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitLoadStorageU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8)); } -void EmitLoadStorageS8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8)); } -void EmitLoadStorageU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16)); } -void EmitLoadStorageS16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16)); } Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 1); + return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32); } Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 2); + return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); } Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 4); + return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); } -void EmitWriteStorageU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8); } -void EmitWriteStorageS8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8); } -void EmitWriteStorageU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16); } -void EmitWriteStorageS16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, value); + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - // TODO: Support reinterpreting bindings, guaranteed to be aligned - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id low_index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))}; - const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)}; - const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)}; - ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); - ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); } void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - // TODO: Support reinterpreting bindings, guaranteed to be aligned - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - for (u32 element = 0; element < 4; ++element) { - Id index = base_index; - if (element > 0) { - index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); - } - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); - } + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ab529e86d..116d93c1c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -315,6 +315,23 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: case IR::Opcode::SharedAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: info.uses_int64 = true; break; default: @@ -457,46 +474,91 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + info.used_storage_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + info.used_storage_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::LoadStorage32: + case IR::Opcode::WriteStorage32: + case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicSMin32: + case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicSMax32: + case IR::Opcode::StorageAtomicUMax32: + case IR::Opcode::StorageAtomicAnd32: + case IR::Opcode::StorageAtomicOr32: + case IR::Opcode::StorageAtomicXor32: + case IR::Opcode::StorageAtomicExchange32: + info.used_storage_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::LoadStorage64: + case IR::Opcode::WriteStorage64: + info.used_storage_buffer_types |= IR::Type::U32x2; + break; + case IR::Opcode::LoadStorage128: + case IR::Opcode::WriteStorage128: + info.used_storage_buffer_types |= IR::Type::U32x4; + break; case IR::Opcode::SharedAtomicInc32: info.uses_shared_increment = true; break; case IR::Opcode::SharedAtomicDec32: info.uses_shared_decrement = true; break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_int64_bit_atomics = true; + break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_increment = true; break; case IR::Opcode::GlobalAtomicDec32: case IR::Opcode::StorageAtomicDec32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_decrement = true; break; case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::StorageAtomicAddF32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32_add = true; break; case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::StorageAtomicAddF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_add = true; break; case IR::Opcode::GlobalAtomicAddF32x2: case IR::Opcode::StorageAtomicAddF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_add = true; break; case IR::Opcode::GlobalAtomicMinF16x2: case IR::Opcode::StorageAtomicMinF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_min = true; break; case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::StorageAtomicMinF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_min = true; break; case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::StorageAtomicMaxF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_max = true; break; case IR::Opcode::GlobalAtomicMaxF32x2: case IR::Opcode::StorageAtomicMaxF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_max = true; break; case IR::Opcode::GlobalAtomicIAdd64: @@ -516,11 +578,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: - info.uses_64_bit_atomics = true; - break; - case IR::Opcode::SharedAtomicExchange64: - info.uses_64_bit_atomics = true; - info.uses_shared_memory_u32x2 = true; + info.used_storage_buffer_types |= IR::Type::U64; + info.uses_int64_bit_atomics = true; break; default: break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 6a51aabb5..15cf09c3d 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -141,10 +141,10 @@ struct Info { bool uses_atomic_f32x2_add{}; bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; - bool uses_64_bit_atomics{}; - bool uses_shared_memory_u32x2{}; + bool uses_int64_bit_atomics{}; IR::Type used_constant_buffer_types{}; + IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 911dfed44..87cfe6312 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -44,6 +44,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -313,6 +314,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, + .pNext = nullptr, + .variablePointersStorageBuffer = VK_TRUE, + .variablePointers = VK_TRUE, + }; + SetNext(next, variable_pointers); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, .pNext = nullptr, @@ -399,6 +408,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; + if (ext_shader_atomic_int64) { + atomic_int64 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR, + .pNext = nullptr, + .shaderBufferInt64Atomics = VK_TRUE, + .shaderSharedInt64Atomics = VK_TRUE, + }; + SetNext(next, atomic_int64); + } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; if (khr_workgroup_memory_explicit_layout) { workgroup_layout = { @@ -624,9 +644,13 @@ void Device::CheckSuitability(bool requires_swapchain) const { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; demote.pNext = nullptr; + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; + variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR; + variable_pointers.pNext = &demote; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &demote; + robustness2.pNext = &variable_pointers; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -654,6 +678,9 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), + std::make_pair(variable_pointers.variablePointers, "variablePointers"), + std::make_pair(variable_pointers.variablePointersStorageBuffer, + "variablePointersStorageBuffer"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 147 ++++++++++++++------ src/shader_recompiler/backend/spirv/emit_context.h | 10 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 39 ++++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_context_get_set.cpp | 88 ++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + src/shader_recompiler/frontend/ir/patch.cpp | 28 ++++ src/shader_recompiler/frontend/ir/patch.h | 149 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/type.h | 41 +++--- src/shader_recompiler/frontend/ir/value.cpp | 9 ++ src/shader_recompiler/frontend/ir/value.h | 4 + src/shader_recompiler/frontend/maxwell/program.cpp | 5 + .../translate/impl/load_store_attribute.cpp | 33 +++-- .../translate/impl/move_special_register.cpp | 2 + .../ir_opt/collect_shader_info_pass.cpp | 41 ++++++ src/shader_recompiler/profile.h | 16 +++ src/shader_recompiler/shader_info.h | 5 + src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 13 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 30 +++++ .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 +- 28 files changed, 605 insertions(+), 91 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/patch.cpp create mode 100644 src/shader_recompiler/frontend/ir/patch.h (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index bbbfa98a3..7c11d15bf 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(shader_recompiler STATIC frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc + frontend/ir/patch.cpp + frontend/ir/patch.h frontend/ir/post_order.cpp frontend/ir/post_order.h frontend/ir/pred.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 032cf5e03..067f61613 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -125,19 +125,36 @@ u32 NumVertices(InputTopology input_topology) { throw InvalidArgument("Invalid input topology {}", input_topology); } -Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { - if (ctx.stage == Stage::Geometry) { - const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); +Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, + std::optional builtin = std::nullopt) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + if (per_invocation) { + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 32u)); + } + break; + case Stage::Geometry: + if (per_invocation) { + const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + } + break; + default: + break; } return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); } -Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { +Id DefineOutput(EmitContext& ctx, Id type, std::optional invocations, + std::optional builtin = std::nullopt) { + if (invocations && ctx.stage == Stage::TessellationControl) { + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], *invocations)); + } return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } -void DefineGenericOutput(EmitContext& ctx, size_t index) { +void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invocations) { static constexpr std::string_view swizzle{"xyzw"}; const size_t base_attr_index{static_cast(IR::Attribute::Generic0X) + index * 4}; u32 element{0}; @@ -150,7 +167,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) { } const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; - const Id id{DefineOutput(ctx, ctx.F32[num_components])}; + const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; ctx.Decorate(id, spv::Decoration::Location, static_cast(index)); if (element > 0) { ctx.Decorate(id, spv::Decoration::Component, element); @@ -161,10 +178,10 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) { ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); } if (num_components < 4 || element > 0) { - ctx.Name(id, fmt::format("out_attr{}", index)); - } else { const std::string_view subswizzle{swizzle.substr(element, num_components)}; ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } else { + ctx.Name(id, fmt::format("out_attr{}", index)); } const GenericElementInfo info{ .id = id, @@ -383,7 +400,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineInterfaces(program.info); + DefineInterfaces(program); DefineLocalMemory(program); DefineSharedMemory(program); DefineSharedMemoryFunctions(program); @@ -472,9 +489,9 @@ void EmitContext::DefineCommonConstants() { f32_zero_value = Constant(F32[1], 0.0f); } -void EmitContext::DefineInterfaces(const Info& info) { - DefineInputs(info); - DefineOutputs(info); +void EmitContext::DefineInterfaces(const IR::Program& program) { + DefineInputs(program.info); + DefineOutputs(program); } void EmitContext::DefineLocalMemory(const IR::Program& program) { @@ -972,26 +989,29 @@ void EmitContext::DefineLabels(IR::Program& program) { void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { - workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); + workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } if (info.uses_local_invocation_id) { - local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); + local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); + } + if (info.uses_invocation_id) { + invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); } if (info.uses_is_helper_invocation) { - is_helper_invocation = DefineInput(*this, U1, spv::BuiltIn::HelperInvocation); + is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } if (info.uses_subgroup_mask) { - subgroup_mask_eq = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupEqMaskKHR); - subgroup_mask_lt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLtMaskKHR); - subgroup_mask_le = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLeMaskKHR); - subgroup_mask_gt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGtMaskKHR); - subgroup_mask_ge = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGeMaskKHR); + subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); } if (info.uses_subgroup_invocation_id || (profile.warp_size_potentially_larger_than_guest && (info.uses_subgroup_vote || info.uses_subgroup_mask))) { subgroup_local_invocation_id = - DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); } if (info.uses_fswzadd) { const Id f32_one{Constant(F32[1], 1.0f)}; @@ -1004,29 +1024,32 @@ void EmitContext::DefineInputs(const Info& info) { if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = DefineInput(*this, F32[4], built_in); + input_position = DefineInput(*this, F32[4], true, built_in); } if (info.loads_instance_id) { if (profile.support_vertex_instance_id) { - instance_id = DefineInput(*this, U32[1], spv::BuiltIn::InstanceId); + instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { - instance_index = DefineInput(*this, U32[1], spv::BuiltIn::InstanceIndex); - base_instance = DefineInput(*this, U32[1], spv::BuiltIn::BaseInstance); + instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } if (info.loads_vertex_id) { if (profile.support_vertex_instance_id) { - vertex_id = DefineInput(*this, U32[1], spv::BuiltIn::VertexId); + vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { - vertex_index = DefineInput(*this, U32[1], spv::BuiltIn::VertexIndex); - base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex); + vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } if (info.loads_front_face) { - front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); + front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } if (info.loads_point_coord) { - point_coord = DefineInput(*this, F32[2], spv::BuiltIn::PointCoord); + point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); + } + if (info.loads_tess_coord) { + tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { const InputVarying generic{info.input_generics[index]}; @@ -1038,7 +1061,7 @@ void EmitContext::DefineInputs(const Info& info) { continue; } const Id type{GetAttributeType(*this, input_type)}; - const Id id{DefineInput(*this, type)}; + const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; @@ -1059,58 +1082,98 @@ void EmitContext::DefineInputs(const Info& info) { break; } } + if (stage == Stage::TessellationEval) { + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineInput(*this, F32[4], false)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast(index)); + patches[index] = id; + } + } } -void EmitContext::DefineOutputs(const Info& info) { +void EmitContext::DefineOutputs(const IR::Program& program) { + const Info& info{program.info}; + const std::optional invocations{program.invocations}; if (info.stores_position || stage == Stage::VertexB) { - output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); + output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } - output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); + output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } if (info.stores_clip_distance) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; - clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); + clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores_layer && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } - layer = DefineOutput(*this, U32[1], spv::BuiltIn::Layer); + layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } if (info.stores_viewport_index && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } - viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); + viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { - DefineGenericOutput(*this, index); + DefineGenericOutput(*this, index, invocations); } } - if (stage == Stage::Fragment) { + switch (stage) { + case Stage::TessellationControl: + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], Constant(U32[1], 4))}; + output_tess_level_outer = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], Constant(U32[1], 2))}; + output_tess_level_inner = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineOutput(*this, F32[4], std::nullopt)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast(index)); + patches[index] = id; + } + break; + case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { if (!info.stores_frag_color[index]) { continue; } - frag_color[index] = DefineOutput(*this, F32[4]); + frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); Decorate(frag_color[index], spv::Decoration::Location, index); Name(frag_color[index], fmt::format("frag_color{}", index)); } if (info.stores_frag_depth) { - frag_depth = DefineOutput(*this, F32[1]); + frag_depth = DefineOutput(*this, F32[1], std::nullopt); Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); Name(frag_depth, "frag_depth"); } + break; + default: + break; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 0da14d5f8..ba0a253b3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -147,6 +147,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; @@ -162,6 +163,7 @@ public: Id base_vertex{}; Id front_face{}; Id point_coord{}; + Id tess_coord{}; Id clip_distances{}; Id layer{}; Id viewport_index{}; @@ -204,6 +206,10 @@ public: Id output_position{}; std::array, 32> output_generics{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + std::array patches{}; + std::array frag_color{}; Id frag_depth{}; @@ -212,7 +218,7 @@ public: private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineInterfaces(const Info& info); + void DefineInterfaces(const IR::Program& program); void DefineLocalMemory(const IR::Program& program); void DefineSharedMemory(const IR::Program& program); void DefineSharedMemoryFunctions(const IR::Program& program); @@ -226,7 +232,7 @@ private: void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); - void DefineOutputs(const Info& info); + void DefineOutputs(const IR::Program& program); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3bf4c6a9e..105602ccf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -45,6 +45,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.Label(); } else if constexpr (std::is_same_v) { return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } else if constexpr (std::is_same_v) { return arg.Reg(); } @@ -120,6 +122,30 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Isolines: + return spv::ExecutionMode::Isolines; + case TessPrimitive::Triangles: + return spv::ExecutionMode::Triangles; + case TessPrimitive::Quads: + return spv::ExecutionMode::Quads; + } + throw InvalidArgument("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return spv::ExecutionMode::SpacingEqual; + case TessSpacing::FractionalOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case TessSpacing::FractionalEven: + return spv::ExecutionMode::SpacingFractionalEven; + } + throw InvalidArgument("Tessellation spacing {}", spacing); +} + void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; @@ -134,6 +160,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Stage::VertexB: execution_model = spv::ExecutionModel::Vertex; break; + case Stage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); + break; + case Stage::TessellationEval: + execution_model = spv::ExecutionModel::TessellationEvaluation; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing)); + ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); + break; case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 55b2edba0..8caf30f1b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -55,6 +55,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); @@ -67,6 +69,7 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 59c56c5ba..4a1aeece5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -32,13 +32,26 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { template Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return ctx.OpAccessChain(pointer_type, base, vertex, std::forward(args)...); - } else { + default: return ctx.OpAccessChain(pointer_type, base, std::forward(args)...); } } +template +Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { + if (ctx.stage == Stage::TessellationControl) { + const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; + return ctx.OpAccessChain(result_type, base, invocation_id, std::forward(args)...); + } else { + return ctx.OpAccessChain(result_type, base, std::forward(args)...); + } +} + std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; @@ -49,7 +62,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } else { const u32 index_element{element - info.first_element}; const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; - return ctx.OpAccessChain(ctx.output_f32, info.id, index_id); + return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } switch (attr) { @@ -61,7 +74,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionW: { const u32 element{static_cast(attr) % 4}; const Id element_id{ctx.Constant(ctx.U32[1], element)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id); + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: @@ -74,7 +87,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 base{static_cast(IR::Attribute::ClipDistance0)}; const u32 index{static_cast(attr) - base}; const Id clip_num{ctx.Constant(ctx.U32[1], index)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); + return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::Layer: return ctx.profile.support_viewport_index_layer_non_geometry || @@ -222,11 +235,18 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { ctx.Constant(ctx.U32[1], std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, - ctx.u32_zero_value)); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, + ctx.Constant(ctx.U32[1], 1U))); + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, + ctx.Constant(ctx.U32[1], 1U))); + default: throw NotImplementedException("Read attribute {}", attr); } @@ -240,9 +260,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_un } Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); - } else { + default: return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); } } @@ -251,6 +274,45 @@ void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unus ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); } +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.Constant(ctx.U32[1], index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.Constant(ctx.U32[1], 1u)); + default: + throw NotImplementedException("Patch {}", patch); + } + }()}; + ctx.OpStore(pointer, value); +} + void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { const Id component_id{ctx.Constant(ctx.U32[1], component)}; const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; @@ -301,6 +363,10 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); +} + Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d66eb17a6..b821d9f47 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -331,6 +331,14 @@ void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, c Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } @@ -363,6 +371,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U32 IREmitter::InvocationId() { + return Inst(Opcode::InvocationId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e70359eb1..7f8f1ad42 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -84,6 +84,9 @@ public: [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); @@ -95,6 +98,7 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U32 InvocationId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 204c55fa8..b2d7573d9 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -73,6 +73,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::EndPrimitive: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetPatch: case Opcode::SetFragColor: case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 7d3e0b2ab..7f04b647b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -24,6 +24,7 @@ constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 7a21fe746..a86542cd8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -48,6 +48,8 @@ OPCODE(GetAttribute, F32, Attr OPCODE(SetAttribute, Void, Attribute, F32, U32, ) OPCODE(GetAttributeIndexed, F32, U32, U32, ) OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) @@ -60,6 +62,7 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(InvocationId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..1f770bc48 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/patch.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +u32 GenericPatchIndex(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +u32 GenericPatchElement(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + ComponentPadding0, + ComponentPadding1, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 127); + +[[nodiscard]] bool IsGeneric(Patch patch) noexcept; + +[[nodiscard]] u32 GenericPatchIndex(Patch patch); + +[[nodiscard]] u32 GenericPatchElement(Patch patch); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 9a32ca1e8..8b3b33852 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -20,26 +20,27 @@ enum class Type { Reg = 1 << 2, Pred = 1 << 3, Attribute = 1 << 4, - U1 = 1 << 5, - U8 = 1 << 6, - U16 = 1 << 7, - U32 = 1 << 8, - U64 = 1 << 9, - F16 = 1 << 10, - F32 = 1 << 11, - F64 = 1 << 12, - U32x2 = 1 << 13, - U32x3 = 1 << 14, - U32x4 = 1 << 15, - F16x2 = 1 << 16, - F16x3 = 1 << 17, - F16x4 = 1 << 18, - F32x2 = 1 << 19, - F32x3 = 1 << 20, - F32x4 = 1 << 21, - F64x2 = 1 << 22, - F64x3 = 1 << 23, - F64x4 = 1 << 24, + Patch = 1 << 5, + U1 = 1 << 6, + U8 = 1 << 7, + U16 = 1 << 8, + U32 = 1 << 9, + U64 = 1 << 10, + F16 = 1 << 11, + F32 = 1 << 12, + F64 = 1 << 13, + U32x2 = 1 << 14, + U32x3 = 1 << 15, + U32x4 = 1 << 16, + F16x2 = 1 << 17, + F16x3 = 1 << 18, + F16x4 = 1 << 19, + F32x2 = 1 << 20, + F32x3 = 1 << 21, + F32x4 = 1 << 22, + F64x2 = 1 << 23, + F64x3 = 1 << 24, + F64x4 = 1 << 25, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e7ffb86d..bf5f8c0c2 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -18,6 +18,8 @@ Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} @@ -109,6 +111,11 @@ IR::Attribute Value::Attribute() const { return attribute; } +IR::Patch Value::Patch() const { + ValidateAccess(Type::Patch); + return patch; +} + bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); @@ -182,6 +189,8 @@ bool Value::operator==(const Value& other) const { return pred == other.pred; case Type::Attribute: return attribute == other.attribute; + case Type::Patch: + return patch == other.patch; case Type::U1: return imm_u1 == other.imm_u1; case Type::U8: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index a0962863d..303745563 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -9,6 +9,7 @@ #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { @@ -24,6 +25,7 @@ public: explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch value) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -46,6 +48,7 @@ public: [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -67,6 +70,7 @@ private: IR::Reg reg; IR::Pred pred; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ab67446c8..20a1d61cc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -70,6 +70,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool size; } const ald{insn}; - if (ald.o != 0) { - throw NotImplementedException("O"); - } - if (ald.patch != 0) { - throw NotImplementedException("P"); - } const u64 offset{ald.absolute_offset.Value()}; if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); @@ -84,11 +78,19 @@ void TranslatorVisitor::ALD(u64 insn) { const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + } } return; } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); @@ -106,9 +108,6 @@ void TranslatorVisitor::AST(u64 insn) { BitField<47, 2, Size> size; } const ast{insn}; - if (ast.patch != 0) { - throw NotImplementedException("P"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } @@ -120,11 +119,19 @@ void TranslatorVisitor::AST(u64 insn) { const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + element)); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + } } return; } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index bc822d585..660b84c20 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 617ec05ce..aadcf7999 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -53,6 +53,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PointSpriteT: info.loads_point_coord = true; break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + info.loads_tess_coord = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } @@ -94,6 +98,34 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } } +void GetPatch(Info& info, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Reading non-generic patch {}", patch); + } + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; +} + +void SetPatch(Info& info, IR::Patch patch) { + if (IR::IsGeneric(patch)) { + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodBottom: + info.stores_tess_level_outer = true; + break; + case IR::Patch::TessellationLodInteriorU: + case IR::Patch::TessellationLodInteriorV: + info.stores_tess_level_inner = true; + break; + default: + throw NotImplementedException("Set patch {}", patch); + } +} + void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: @@ -350,6 +382,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetAttribute: SetAttribute(info, inst.Arg(0).Attribute()); break; + case IR::Opcode::GetPatch: + GetPatch(info, inst.Arg(0).Patch()); + break; + case IR::Opcode::SetPatch: + SetPatch(info, inst.Arg(0).Patch()); + break; case IR::Opcode::GetAttributeIndexed: info.loads_indexed_attributes = true; break; @@ -368,6 +406,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::InvocationId: + info.uses_invocation_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c26017d75..3a04f075e 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,18 @@ enum class CompareFunction { Always, }; +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + struct TransformFeedbackVarying { u32 buffer{}; u32 stride{}; @@ -74,6 +86,10 @@ struct Profile { bool convert_depth_mode{}; bool force_early_z{}; + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + InputTopology input_topology{}; std::optional fixed_state_point_size; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 336c6131a..4dbf9ed12 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -101,8 +101,10 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + bool uses_invocation_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; + std::array uses_patches{}; std::array input_generics{}; bool loads_position{}; @@ -110,6 +112,7 @@ struct Info { bool loads_vertex_id{}; bool loads_front_face{}; bool loads_point_coord{}; + bool loads_tess_coord{}; bool loads_indexed_attributes{}; std::array stores_frag_color{}; @@ -120,6 +123,8 @@ struct Info { bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; bool uses_fp16{}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index dc4ff0da2..8f0b0b8ec 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -685,6 +685,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { return {}; } +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return VK_POLYGON_MODE_POINT; + case Maxwell::PolygonMode::Line: + return VK_POLYGON_MODE_LINE; + case Maxwell::PolygonMode::Fill: + return VK_POLYGON_MODE_FILL; + } + UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); + return {}; +} + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 9f78e15b6..50a599c11 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -65,6 +65,8 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 84720a6f9..d5e9dae0f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -355,7 +355,8 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, + .polygonMode = + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ee22255bf..0bccc640a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1040,6 +1040,36 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + profile.tess_clockwise = key.state.tessellation_clockwise == 0; + profile.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + profile.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; case Shader::Stage::Geometry: if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 87cfe6312..f0de19ba1 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -225,7 +225,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .drawIndirectFirstInstance = false, .depthClamp = true, .depthBiasClamp = true, - .fillModeNonSolid = false, + .fillModeNonSolid = true, .depthBounds = false, .wideLines = false, .largePoints = true, @@ -670,6 +670,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.largePoints, "largePoints"), std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), + std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), -- cgit v1.2.3 From e3514bcd6b09f623da14c4f3c4ffd988e75577ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:31:15 -0300 Subject: spirv: Implement ViewportMask with NV_viewport_array2 --- src/shader_recompiler/backend/spirv/emit_context.cpp | 4 ++++ src/shader_recompiler/backend/spirv/emit_context.h | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 ++++ src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 5 +++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 10 files changed, 32 insertions(+) (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3946dab14..2f8678b4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -457,6 +457,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); if (info.uses_int8) { AddCapability(spv::Capability::Int8); @@ -1131,6 +1132,9 @@ void EmitContext::DefineOutputs(const IR::Program& program) { } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Constant(U32[1], 1u)), std::nullopt); + } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { DefineGenericOutput(*this, index, invocations); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c7d6f8a38..c41cad098 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -134,6 +134,7 @@ public: Id input_s32{}; Id output_f32{}; + Id output_u32{}; Id image_buffer_type{}; Id sampled_texture_buffer_type{}; @@ -167,6 +168,7 @@ public: Id clip_distances{}; Id layer{}; Id viewport_index{}; + Id viewport_mask{}; Id primitive_id{}; Id fswzadd_lut_a{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 105602ccf..90c4833a8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -303,6 +303,10 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.stores_viewport_index) { ctx.AddCapability(spv::Capability::MultiViewport); } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + ctx.AddExtension("SPV_NV_viewport_array2"); + ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); + } if (info.stores_layer || info.stores_viewport_index) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3de577f6..ca067f1c4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -99,6 +99,11 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { ctx.stage == Shader::Stage::Geometry ? std::optional{ctx.viewport_index} : std::nullopt; + case IR::Attribute::ViewportMask: + if (!ctx.profile.support_viewport_mask) { + return std::nullopt; + } + return ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value); default: throw NotImplementedException("Read attribute {}", attr); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c84bf211f..9631a445e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -96,6 +96,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ViewportIndex: info.stores_viewport_index = true; break; + case IR::Attribute::ViewportMask: + info.stores_viewport_mask = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3a04f075e..a2c2948d5 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -75,6 +75,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; + bool support_viewport_mask{}; bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d6cde1596..d33df8aad 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -124,6 +124,7 @@ struct Info { bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_viewport_mask{}; bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0bccc640a..4d0d3ebb7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -690,6 +690,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0de19ba1..72b83f99a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -346,6 +346,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); } + if (!nv_viewport_array2) { + LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -724,6 +728,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4e6d13308..4415558bb 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -169,6 +169,11 @@ public: return nv_viewport_swizzle; } + /// Returns true if the device supports VK_NV_viewport_array2. + bool IsNvViewportArray2Supported() const { + return nv_viewport_array2; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -312,6 +317,7 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. + bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: shader: Implement PIXLD.MY_INDEX --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 3 ++ src/shader_recompiler/backend/spirv/emit_context.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 ++ src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/pixel_load.cpp | 46 ++++++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 3 ++ src/shader_recompiler/shader_info.h | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 3 +- 14 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 7c11d15bf..07963a760 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -137,6 +137,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/impl/output_geometry.cpp + frontend/maxwell/translate/impl/pixel_load.cpp frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2f8678b4e..0b4abeb44 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -998,6 +998,9 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_invocation_id) { invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); } + if (info.uses_sample_id) { + sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + } if (info.uses_is_helper_invocation) { is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c41cad098..9d8340333 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -149,6 +149,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; Id invocation_id{}; + Id sample_id{}; Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 90c4833a8..9ec970706 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -335,6 +335,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_writes) { ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } + if (info.uses_sample_id) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } if (!ctx.profile.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8caf30f1b..dfddf5e58 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -70,6 +70,7 @@ void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index aaa20ab95..7555dd94c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -391,6 +391,10 @@ Id EmitInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); } +Id EmitSampleId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.sample_id); +} + Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b821d9f47..141efd86c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -375,6 +375,10 @@ U32 IREmitter::InvocationId() { return Inst(Opcode::InvocationId); } +U32 IREmitter::SampleId() { + return Inst(Opcode::SampleId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7f8f1ad42..81833d928 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,7 @@ public: [[nodiscard]] U32 LocalInvocationIdZ(); [[nodiscard]] U32 InvocationId(); + [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a86542cd8..d5e443673 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -63,6 +63,7 @@ OPCODE(SetOFlag, Void, U1, OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) OPCODE(InvocationId, U32, ) +OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a45d1e4be..a4f99bbbe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,10 +181,6 @@ void TranslatorVisitor::PEXIT(u64) { ThrowNotImplemented(Opcode::PEXIT); } -void TranslatorVisitor::PIXLD(u64) { - ThrowNotImplemented(Opcode::PIXLD); -} - void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9631a445e..5d1310466 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -415,6 +415,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::InvocationId: info.uses_invocation_id = true; break; + case IR::Opcode::SampleId: + info.uses_sample_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d33df8aad..686f5c719 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -102,6 +102,7 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; bool uses_invocation_id{}; + bool uses_sample_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; std::array uses_patches{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 72b83f99a..038231298 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -218,7 +218,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .independentBlend = true, .geometryShader = true, .tessellationShader = true, - .sampleRateShading = false, + .sampleRateShading = true, .dualSrcBlend = false, .logicOp = false, .multiDrawIndirect = false, @@ -677,6 +677,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.sampleRateShading, "sampleRateShading"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), -- cgit v1.2.3 From 0ace34575cd099fb0db955ab32c215106ef19f84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:19:14 -0300 Subject: shader: Require dual source blending --- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 038231298..9c609e504 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -219,7 +219,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .geometryShader = true, .tessellationShader = true, .sampleRateShading = true, - .dualSrcBlend = false, + .dualSrcBlend = true, .logicOp = false, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, @@ -678,6 +678,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), + std::make_pair(features.dualSrcBlend, "dualSrcBlend"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), -- cgit v1.2.3 From 2dc86372c76afb134651499452bb5074b6d1e839 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Fri, 23 Apr 2021 02:38:02 -0300 Subject: shader: Fix bugs and build issues on GCC --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6e9f66262..6611c1de3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -95,7 +95,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; index < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e43db280f..a8b402253 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -226,7 +226,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4415558bb..ebe073293 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -4,10 +4,10 @@ #pragma once +#include #include #include #include -#include #include #include "common/common_types.h" -- cgit v1.2.3 From 0c0ee9d8973abd7d1649df7a6b6f57b3a5570dfe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:39:00 -0300 Subject: vulkan_device: Require shaderClipDistance and shaderCullDistance features --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9c609e504..2318c1bda 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -249,8 +249,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, + .shaderClipDistance = true, + .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, .shaderInt16 = true, @@ -684,6 +684,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(features.shaderClipDistance, "shaderClipDistance"), + std::make_pair(features.shaderCullDistance, "shaderCullDistance"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(variable_pointers.variablePointers, "variablePointers"), std::make_pair(variable_pointers.variablePointersStorageBuffer, -- cgit v1.2.3 From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 + src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + .../translate/impl/move_special_register.cpp | 7 + src/video_core/CMakeLists.txt | 4 + src/video_core/buffer_cache/buffer_cache.h | 53 ++-- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 37 ++- src/video_core/renderer_opengl/gl_buffer_cache.h | 40 ++- .../renderer_opengl/gl_compute_program.cpp | 178 +++++++++++++ .../renderer_opengl/gl_compute_program.h | 83 ++++++ src/video_core/renderer_opengl/gl_device.cpp | 89 ------- src/video_core/renderer_opengl/gl_device.h | 16 -- .../renderer_opengl/gl_graphics_program.cpp | 296 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_program.h | 105 ++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 275 ++++++++++++++++++- src/video_core/renderer_opengl/gl_shader_cache.h | 98 ++++--- .../renderer_opengl/gl_shader_manager.cpp | 146 ---------- src/video_core/renderer_opengl/gl_shader_manager.h | 73 +---- .../renderer_opengl/gl_texture_cache.cpp | 257 ++++++------------ src/video_core/renderer_opengl/gl_texture_cache.h | 29 +- src/video_core/renderer_opengl/maxwell_to_gl.h | 108 ++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 17 +- src/video_core/renderer_opengl/renderer_opengl.h | 5 +- src/video_core/renderer_opengl/util_shaders.cpp | 13 +- src/video_core/renderer_vulkan/pipeline_helper.h | 17 -- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 - src/video_core/shader_cache.cpp | 17 ++ src/video_core/shader_cache.h | 23 +- src/video_core/shader_environment.cpp | 4 +- src/video_core/shader_environment.h | 16 -- src/video_core/texture_cache/formatter.cpp | 4 +- src/video_core/texture_cache/formatter.h | 3 +- src/video_core/textures/texture.h | 9 + src/video_core/vulkan_common/vulkan_device.cpp | 2 +- 38 files changed, 1427 insertions(+), 705 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a..5913fdeff 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + U32 IREmitter::LocalInvocationIdX() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d..a12919283 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); + [[nodiscard]] Value LocalInvocationId(); [[nodiscard]] U32 LocalInvocationIdX(); [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f5..b008c37c0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,10 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_program.cpp + renderer_opengl/gl_compute_program.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_program.cpp + renderer_opengl/gl_graphics_program.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..2d0ef1307 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint base_binding = graphics_base_storage_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97..4986c65fd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector views; }; class BufferCacheRuntime { @@ -75,13 +85,19 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { if (use_assembly_shaders) { const GLuint handle = fast_uniforms[stage][binding_index].handle; const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, fast_uniforms[stage][binding_index].handle, 0, @@ -103,7 +119,7 @@ public: std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast(offset), static_cast(size)); @@ -118,6 +134,19 @@ public: return has_fast_buffer_sub_data; } + void SetBaseUniformBindings(const std::array& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -133,6 +162,11 @@ private: u32 max_attributes = 0; + std::array graphics_base_uniform_bindings{}; + std::array graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional stream_buffer; std::array, @@ -155,8 +189,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 000000000..d5ef65439 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputeProgramKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, + program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputeProgram::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 000000000..64a75d44d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputeProgramKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputeProgramKey&) const noexcept; + + bool operator!=(const ComputeProgramKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputeProgram { +public: + explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + OGLProgram program; + Shader::Info info; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..18bbc4c1f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -22,34 +22,11 @@ namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template T GetInteger(GLenum pname) { @@ -82,15 +59,6 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - std::array BuildMaxUniformBuffers() noexcept { std::array max; std::ranges::transform(LIMIT_UBOS, max.begin(), @@ -98,62 +66,6 @@ std::array BuildMaxUniformBuffers() noexcep return max; } -std::array BuildBaseBindings() noexcept { - std::array bindings; - - static constexpr std::array stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { @@ -225,7 +137,6 @@ Device::Device() { } max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..152a3acd3 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -12,13 +12,6 @@ namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); explicit Device(std::nullptr_t); @@ -28,14 +21,6 @@ public: return max_uniform_buffers[static_cast(shader_type)]; } - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast(shader_type)); - } - size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -134,7 +119,6 @@ private: std::string vendor_name; std::array max_uniform_buffers{}; - std::array base_bindings{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 000000000..fd0958719 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -0,0 +1,296 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +size_t GraphicsProgramKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + const std::array& infos) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + const auto& info{stage_infos[stage]}; + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + for (const auto& desc : info.constant_buffer_descriptors) { + base_uniform_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.storage_buffers_descriptors) { + base_storage_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers[stage] += desc.count; + num_textures += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers[stage] += desc.count; + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsProgram::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + texture_cache.UpdateRenderTargets(false); + + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 000000000..5adf3f41e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsProgramKey&) const noexcept; + + bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { +public: + explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, const std::array& infos); + + void Configure(bool is_indexed); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863..e527b76ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + if (!program) { + return; + } + program->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40..c9ca1f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,11 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" @@ -25,17 +30,281 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { +namespace { +// FIXME: Move this somewhere else +const Shader::Profile profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = true, + .support_viewport_mask = true, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .warp_size_potentially_larger_than_guest = true, + .support_int64_atomics = false, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + + .generic_input_types = {}, + .convert_depth_mode = false, + .force_early_z = false, + + .tess_primitive = {}, + .tess_spacing = {}, + .tess_clockwise = false, + + .input_topology = Shader::InputTopology::Triangles, + + .fixed_state_point_size = std::nullopt, + + .alpha_test_func = Shader::CompareFunction::Always, + .alpha_test_reference = 0.0f, + + .y_negate = false, + + .xfb_varyings = {}, +}; + +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::GraphicsEnvironment; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +void AddShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); + + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (!Settings::values.renderer_debug) { + return; + } + GLint shader_status{}; + glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} +} // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ + state_tracker_} {} ShaderCache::~ShaderCache() = default; +GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { + if (!RefreshStages(graphics_key.unique_hashes)) { + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& program{pair->second}; + if (is_new) { + program = CreateGraphicsProgram(); + } + return program.get(); +} + +ComputeProgram* ShaderCache::CurrentComputeProgram() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute.launch_description}; + const ComputeProgramKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputeProgram(key, shader); + return pipeline.get(); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + + Shader::Backend::SPIRV::Bindings binding; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const std::vector code{EmitSPIRV(profile, program, binding)}; + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + AddShader(Stage(stage_index), gl_program.handle, code); + } + LinkProgram(gl_program.handle); + + return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, + program_manager, state_tracker, std::move(gl_program), + infos); +} + +std::unique_ptr ShaderCache::CreateComputeProgram( + const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + return CreateComputeProgram(main_pools, key, env, true); +} + +std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + Shader::Backend::SPIRV::Bindings binding; + const std::vector code{EmitSPIRV(profile, program, binding)}; + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); + LinkProgram(gl_program.handle); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, + program_manager, std::move(gl_program), program.info); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17c..b479d073a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,20 +5,18 @@ #pragma once #include -#include -#include -#include -#include -#include #include -#include -#include #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -32,64 +30,62 @@ class EmuWindow; namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - - std::array unique_hashes; - std::array color_formats; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, u32> tessellation_primitive; - BitField<8, 2, u32> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - u32 padding; - TransformFeedbackState xfb_state; - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { -public: -private: + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; }; class ShaderCache : public VideoCommon::ShaderCache { public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + + [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + private: + std::unique_ptr CreateGraphicsProgram(); + + std::unique_ptr CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, + std::span envs, bool build_in_parallel); + + std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, + const VideoCommon::ShaderInfo* shader); + + std::unique_ptr CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel); + Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + + GraphicsProgramKey graphics_key{}; + + ShaderPools main_pools; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..70781d6f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,24 @@ #pragma once -#include - #include -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); - - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); + void BindProgram(GLuint program) { + if (bound_program == program) { + return; + } + bound_program = program; + glUseProgram(program); + } - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); - - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); + void RestoreGuestCompute() {} private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; - }; - - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; - - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; + GLuint bound_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf84218..7053be161 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arrayflags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array new_handles; glGenTextures(static_cast(new_handles.size()), new_handles.data()); @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } null_image_views[static_cast(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } VideoCommon::SubresourceRange flatten_range = info.range; std::array handles; @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI [[fallthrough]]; case ImageViewType::e1D: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); break; case ImageViewType::e2DArray: flatten_range.extent.layers = 1; @@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .extent = {.levels = 1, .layers = 1}, }; glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + } else { + glGenTextures(2, handles.data()); + SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, + info.range); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); break; case ImageViewType::CubeArray: flatten_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} @@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); + const std::string name = VideoCommon::Name(*this); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } stored_views.emplace_back().handle = handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e650..2e3e02b79 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array null_image_views; + std::array null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast(query_type)]; + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -198,15 +197,25 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range); - std::array views{}; + std::array views{}; std::vector stored_views; - GLuint default_handle = 0; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..4e77ef808 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, - program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { OGLShader fragment_shader; fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); // Generate presentation sampler present_sampler.Create(); @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindProgram(present_program.handle); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..b3ee55665 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_program; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..51e72b705 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,7 +16,6 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { - main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5..78bf90c48 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { return MakeShaderInfo(env, *cpu_shader_addr); } +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + ShaderInfo* ShaderCache::TryGet(VAddr addr) const { std::scoped_lock lock{lookup_mutex}; diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc84..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,14 +4,18 @@ #pragma once +#include +#include #include #include +#include #include #include #include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" namespace Tegra { class MemoryManager; @@ -30,6 +34,8 @@ class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; @@ -58,6 +64,15 @@ public: void SyncGuestHost(); protected: + struct GraphicsEnvironments { + std::array envs; + std::array env_ptrs; + + std::span Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); + } + }; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_); @@ -65,17 +80,21 @@ protected: /// @brief Update the hashes and information of shader stages /// @param unique_hashes Shader hashes to store into when a stage is enabled /// @return True no success, false on error - bool RefreshStages(std::array& unique_hashes); + bool RefreshStages(std::array& unique_hashes); /// @brief Returns information about the current compute shader /// @return Pointer to a valid shader, nullptr on error const ShaderInfo* ComputeShader(); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes); + Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; - std::array shader_infos{}; + std::array shader_infos{}; bool last_shaders_valid = false; private: diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc0097..c93174519 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -187,8 +187,8 @@ std::optional GenericEnvironment::TryFindSize() { Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d712045..d26dbfaab 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -29,22 +29,6 @@ class Memorymanager; namespace VideoCommon { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - if (via_header_index) { - image = data; - sampler = data; - } else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda..e27a2b51e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, -- cgit v1.2.3 From 99f2c31b64aa7854690368f4637ef59a546b2d15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:53:39 -0300 Subject: vulkan_device: Enable float64 and int64 conditionally Add Intel Xe support. --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- src/video_core/vulkan_common/vulkan_device.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e27a2b51e..aabcb0b10 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -251,8 +251,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = true, .shaderCullDistance = true, - .shaderFloat64 = true, - .shaderInt64 = true, + .shaderFloat64 = is_shader_float64_supported, + .shaderInt64 = is_shader_int64_supported, .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, @@ -909,6 +909,8 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_float64_supported = features.shaderFloat64; + is_shader_int64_supported = features.shaderInt64; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ebe073293..693419505 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -314,6 +314,8 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_shader_float64_supported{}; ///< Support for float64. + bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. -- cgit v1.2.3 From 77372443c3d6b20d7f78366bb4aa162f22bd7cde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:43:47 -0300 Subject: vulkan: Enable depth bounds and use it conditionally Intel devices pre-Xe don't support this. --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dfe6e6a80..d381109d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -598,13 +598,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthCompareOp = dynamic.depth_test_enable ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), .stencilTestEnable = dynamic.stencil_enable, .front = GetStencilFaceState(dynamic.front), .back = GetStencilFaceState(dynamic.back), .minDepthBounds = 0.0f, .maxDepthBounds = 0.0f, }; + if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + } static_vector cb_attachments; const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ef14e91e7..9611b480a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -682,6 +682,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } + bool enabled = regs.depth_bounds_enable; + if (enabled && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + enabled = false; + } scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aabcb0b10..0a42efb6a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -226,7 +226,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthClamp = true, .depthBiasClamp = true, .fillModeNonSolid = true, - .depthBounds = false, + .depthBounds = is_depth_bounds_supported, .wideLines = false, .largePoints = true, .alphaToOne = false, @@ -908,6 +908,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_depth_bounds_supported = features.depthBounds; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 693419505..1ab63ecd7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + // Returns true if depth bounds is supported. + bool IsDepthBoundsSupported() const { + return is_depth_bounds_supported; + } + /// Returns true when blitting from and to depth stencil images is supported. bool IsBlitDepthStencilSupported() const { return is_blit_depth_stencil_supported; @@ -314,6 +319,7 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. -- cgit v1.2.3 From 1148a4eac715869077ace56a9a311a167643aca3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:44:28 -0300 Subject: vulkan: Conditionally use shaderInt16 Add support for Polaris AMD devices. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cec51cc77..2a2f166c8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -249,7 +249,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .unified_descriptor_binding = true, .support_descriptor_aliasing = true, .support_int8 = true, - .support_int16 = true, + .support_int16 = device.IsShaderInt16Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0a42efb6a..2b715baba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -253,7 +253,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderCullDistance = true, .shaderFloat64 = is_shader_float64_supported, .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = true, + .shaderInt16 = is_shader_int16_supported, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -912,6 +912,7 @@ void Device::SetupFeatures() { is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; + is_shader_int16_supported = features.shaderInt16; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 1ab63ecd7..9bc1fb947 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true if shader int16 is supported. + bool IsShaderInt16Supported() const { + return is_shader_int16_supported; + } + // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { return is_depth_bounds_supported; @@ -322,6 +327,7 @@ private: bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. + bool is_shader_int16_supported{}; ///< Support for int16. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. -- cgit v1.2.3 From 8f099af6a8ea691c5f8f1403848ca42077b34bd6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:59 -0300 Subject: nsight_aftermath_tracker: Fix SPIR-V module writes --- src/video_core/vulkan_common/nsight_aftermath_tracker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 209cb1e0a..fdd1a5081 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -99,7 +99,7 @@ void NsightAftermathTracker::SaveShader(std::span spirv) const { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; } - if (file.Write(spirv) != spirv.size()) { + if (file.WriteSpan(spirv) != spirv.size()) { LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); return; } -- cgit v1.2.3 From d554778311c32e0a19ecdc13d7525b264d8443b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:52:04 -0300 Subject: vulkan: Use VK_EXT_provoking_vertex when available --- src/video_core/engines/maxwell_3d.h | 7 +++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 ++-- .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 +++++++- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 +++++ 6 files changed, 52 insertions(+), 4 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cbf94412b..04d5790f6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1151,7 +1151,11 @@ public: u32 index; } primitive_restart; - INSERT_PADDING_WORDS_NOINIT(0x5F); + INSERT_PADDING_WORDS_NOINIT(0xE); + + u32 provoking_vertex_last; + + INSERT_PADDING_WORDS_NOINIT(0x50); struct { u32 start_addr_high; @@ -1672,6 +1676,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); +ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1486d088a..f121fbf0e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,6 +84,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); @@ -91,8 +93,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 0f1eff9cd..60adae316 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -192,6 +192,7 @@ struct FixedPipelineState { BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; BitField<10, 1, u32> y_negate; + BitField<11, 1, u32> provoking_vertex_last; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5c916c869..06a80c2ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -567,9 +567,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { viewport_ci.pNext = &swizzle_ci; } + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; const VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -586,6 +593,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2b715baba..618535aae 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + if (ext_provoking_vertex) { + provoking_vertex = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .pNext = nullptr, + .provokingVertexLast = VK_TRUE, + .transformFeedbackPreservesProvokingVertex = VK_TRUE, + }; + SetNext(next, provoking_vertex); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -718,6 +731,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; + bool has_ext_provoking_vertex{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -748,6 +762,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -799,6 +814,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_provoking_vertex) { + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; + provoking_vertex.pNext = nullptr; + features.pNext = &provoking_vertex; + physical.GetFeatures2KHR(features); + + if (provoking_vertex.provokingVertexLast && + provoking_vertex.transformFeedbackPreservesProvokingVertex) { + extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + ext_provoking_vertex = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 9bc1fb947..37f589612 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -244,6 +244,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_provoking_vertex. + bool IsExtProvokingVertexSupported() const { + return ext_provoking_vertex; + } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { return ext_shader_atomic_int64; @@ -346,6 +351,7 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From ea038d66538975319858f792052af1d0fa997fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 05:07:52 -0300 Subject: vulkan: Add VK_EXT_vertex_input_dynamic_state support Reduces the number of total pipelines generated on Vulkan. Tested on Super Smash Bros. Ultimate. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 67 ++++++++----- .../renderer_vulkan/fixed_pipeline_state.h | 73 ++++++++------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 107 +++++++++++++-------- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 +++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 56 +++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_state_tracker.cpp | 50 ++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 8 +- src/video_core/vulkan_common/vulkan_device.h | 6 ++ src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 8 ++ 11 files changed, 291 insertions(+), 116 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f121fbf0e..16cef8711 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -50,7 +50,7 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state) { + bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; const std::array enabled_lut{ regs.polygon_offset_point_enable, @@ -60,7 +60,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; - no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); + dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); @@ -73,11 +74,11 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); msaa_mode.Assign(regs.multisample_mode); raw2 = 0; + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); @@ -93,24 +94,44 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { - maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; - } - } - if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { - maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); + if (maxwell3d.dirty.flags[Dirty::VertexInput]) { + if (has_dynamic_vertex_input) { + // Dirty flag will be reset by the command buffer update + static constexpr std::array LUT{ + 0u, // Invalid + 1u, // SignedNorm + 1u, // UnsignedNorm + 2u, // SignedInt + 3u, // UnsignedInt + 1u, // UnsignedScaled + 1u, // SignedScaled + 1u, // Float + }; + const auto& attrs = regs.vertex_attrib_format; + attribute_types = 0; + for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + const u32 mask = attrs[i].constant != 0 ? 0 : 3; + const u32 type = LUT[static_cast(attrs[i].type.Value())]; + attribute_types |= static_cast(type & mask) << (i * 2); + } + } else { + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); + } } } if (maxwell3d.dirty.flags[Dirty::Blending]) { @@ -126,10 +147,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (no_extended_dynamic_state != 0) { + if (!extended_dynamic_state) { dynamic_state.Refresh(regs); } - if (xfb_enabled != 0) { + if (xfb_enabled) { RefreshXfbState(xfb_state, regs); } } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 60adae316..04f34eb97 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -168,44 +168,51 @@ struct FixedPipelineState { union { u32 raw1; - BitField<0, 1, u32> no_extended_dynamic_state; - BitField<1, 1, u32> xfb_enabled; - BitField<2, 1, u32> primitive_restart_enable; - BitField<3, 1, u32> depth_bias_enable; - BitField<4, 1, u32> depth_clamp_disabled; - BitField<5, 1, u32> ndc_minus_one_to_one; - BitField<6, 2, u32> polygon_mode; - BitField<8, 5, u32> patch_control_points_minus_one; - BitField<13, 2, u32> tessellation_primitive; - BitField<15, 2, u32> tessellation_spacing; - BitField<17, 1, u32> tessellation_clockwise; - BitField<18, 1, u32> logic_op_enable; - BitField<19, 4, u32> logic_op; - BitField<23, 1, u32> rasterize_enable; + BitField<0, 1, u32> extended_dynamic_state; + BitField<1, 1, u32> dynamic_vertex_input; + BitField<2, 1, u32> xfb_enabled; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> depth_clamp_disabled; + BitField<6, 1, u32> ndc_minus_one_to_one; + BitField<7, 2, u32> polygon_mode; + BitField<9, 5, u32> patch_control_points_minus_one; + BitField<14, 2, u32> tessellation_primitive; + BitField<16, 2, u32> tessellation_spacing; + BitField<18, 1, u32> tessellation_clockwise; + BitField<19, 1, u32> logic_op_enable; + BitField<20, 4, u32> logic_op; BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> early_z; - BitField<4, 1, u32> depth_enabled; - BitField<5, 5, u32> depth_format; - BitField<10, 1, u32> y_negate; - BitField<11, 1, u32> provoking_vertex_last; + BitField<0, 1, u32> rasterize_enable; + BitField<1, 3, u32> alpha_test_func; + BitField<4, 1, u32> early_z; + BitField<5, 1, u32> depth_enabled; + BitField<6, 5, u32> depth_format; + BitField<11, 1, u32> y_negate; + BitField<12, 1, u32> provoking_vertex_last; }; std::array color_formats; u32 alpha_test_ref; u32 point_size; - std::array binding_divisors; - std::array attributes; std::array attachments; std::array viewport_swizzles; + union { + u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state + u64 enabled_divisors; + }; + std::array attributes; + std::array binding_divisors; + DynamicState dynamic_state; VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, + bool has_dynamic_vertex_input); size_t Hash() const noexcept; @@ -216,16 +223,24 @@ struct FixedPipelineState { } size_t Size() const noexcept { - if (xfb_enabled != 0) { + if (xfb_enabled) { // When transform feedback is enabled, use the whole struct return sizeof(*this); - } else if (no_extended_dynamic_state != 0) { - // Dynamic state is enabled, we can enable more - return offsetof(FixedPipelineState, xfb_state); - } else { - // No XFB, extended dynamic state enabled + } + if (dynamic_vertex_input) { + // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, attributes); + } + if (extended_dynamic_state) { + // Exclude dynamic state return offsetof(FixedPipelineState, dynamic_state); } + // Default + return offsetof(FixedPipelineState, xfb_state); + } + + u32 DynamicAttributeType(size_t index) const noexcept { + return (attribute_types >> (index * 2)) & 0b11; } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 06a80c2ba..ccef71f4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,39 +472,65 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (!device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = key.state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ + static_vector vertex_attributes; + if (key.state.dynamic_vertex_input) { + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const u32 type = key.state.DynamicAttributeType(index); + if (!input_attributes[index].used || type == 0) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = 0, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT + : VK_FORMAT_R32_UINT, + .offset = 0, + }); + } + if (!vertex_attributes.empty()) { + vertex_bindings.push_back({ + .binding = 0, + .stride = 4, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }); + } + } else { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = + instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ .binding = static_cast(index), - .divisor = key.state.binding_divisors[index], + .stride = dynamic.vertex_strides[index], + .inputRate = rate, }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = key.state.binding_divisors[index], + }); + } } - } - static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < key.state.attributes.size(); ++index) { - const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { - continue; + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !input_attributes[index].used) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); } VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -545,27 +571,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; + std::array swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } + const VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, @@ -660,13 +684,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; - if (device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, @@ -678,6 +702,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; + if (key.state.dynamic_vertex_input) { + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6df4088a7..db7da5555 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,6 +109,20 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { + switch (state.DynamicAttributeType(index)) { + case 0: + return Shader::AttributeType::Disabled; + case 1: + return Shader::AttributeType::Float; + case 2: + return Shader::AttributeType::SignedInt; + case 3: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Disabled; +} + Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -123,13 +137,19 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { info.fixed_state_point_size = point_size; } - if (key.state.xfb_enabled != 0) { + if (key.state.xfb_enabled) { info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } - std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), - &CastAttributeType); + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + info.generic_input_types[index] = AttributeType(key.state, index); + } + } else { + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + } break; case Shader::Stage::TessellationEval: // We have to flip tessellation clockwise for some reason... @@ -298,7 +318,8 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + device.IsExtVertexInputDynamicStateSupported()); if (current_pipeline) { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e339e9739..855c17769 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -551,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateFrontFace(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { + UpdateVertexInput(regs); + } } } @@ -780,4 +783,57 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } +void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[Dirty::VertexInput]) { + return; + } + dirty[Dirty::VertexInput] = false; + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexAttribute0 + index]) { + continue; + } + const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; + const u32 binding{attribute.buffer}; + dirty[Dirty::VertexAttribute0 + index] = false; + dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; + + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = attribute.IsConstant() + ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 + : MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexBinding0 + index]) { + continue; + } + dirty[Dirty::VertexBinding0 + index] = false; + + const u32 binding{static_cast(index)}; + const auto& input_binding{regs.vertex_array[binding]}; + const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; + bindings.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, + .pNext = nullptr, + .binding = binding, + .stride = input_binding.stride, + .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, + .divisor = is_instanced ? input_binding.divisor : 1, + }); + } + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + cmdbuf.SetVertexInputEXT(bindings, attributes); + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1302bed02..c954fa7f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -135,6 +135,8 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..0ebe0473f 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, + Viewports, Scissors, DepthBias, BlendConstants, + DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, + DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, + StencilOp, StencilTestEnable, VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { flags[index] = true; } + for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { + flags[index] = true; + } + for (int index = VertexBinding0; index <= VertexBinding31; ++index) { + flags[index] = true; + } return flags; } @@ -134,31 +141,38 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); } -void SetupDirtyInstanceDivisors(Tables& tables) { - static constexpr size_t divisor_offset = 3; - for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { - tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; - tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = - InstanceDivisors; +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; } } void SetupDirtyVertexAttributes(Tables& tables) { - FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); + for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); + } + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); } -void SetupDirtyViewportSwizzles(Tables& tables) { - static constexpr size_t swizzle_offset = 6; - for (size_t index = 0; index < Regs::NumViewports; ++index) { - tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = - ViewportSwizzles; +void SetupDirtyVertexBindings(Tables& tables) { + // Do NOT include stride here, it's implicit in VertexBuffer + static constexpr size_t divisor_offset = 3; + for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const u8 flag = static_cast(VertexBinding0 + i); + tables[0][OFF(instanced_arrays) + i] = VertexInput; + tables[1][OFF(instanced_arrays) + i] = flag; + tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; + tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; } } } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables = gpu.Maxwell3D().dirty.tables; + auto& tables{gpu.Maxwell3D().dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -175,9 +189,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); - SetupDirtyInstanceDivisors(tables); - SetupDirtyVertexAttributes(tables); SetupDirtyViewportSwizzles(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyVertexBindings(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..1976b7e9b 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -19,6 +19,12 @@ namespace Dirty { enum : u8 { First = VideoCommon::Dirty::LastCommonEntry, + VertexInput, + VertexAttribute0, + VertexAttribute31 = VertexAttribute0 + 31, + VertexBinding0, + VertexBinding31 = VertexBinding0 + 31, + Viewports, Scissors, DepthBias, @@ -36,8 +42,6 @@ enum : u8 { StencilTestEnable, Blending, - InstanceDivisors, - VertexAttributes, ViewportSwizzles, Last diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 37f589612..4fda472b0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -239,6 +239,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. + bool IsExtVertexInputDynamicStateSupported() const { + return ext_vertex_input_dynamic_state; + } + /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { return ext_shader_stencil_export; @@ -349,6 +354,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 33fb74bfb..7e13ae8af 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -123,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdSetVertexInputEXT); X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 3e36d356a..6e5be1186 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -238,6 +238,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; @@ -1203,6 +1204,13 @@ public: dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } + void SetVertexInputEXT( + vk::Span bindings, + vk::Span attributes) const noexcept { + dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), + attributes.data()); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { -- cgit v1.2.3 From ba3bdf1d4156fa6fd257305406a3b88f0c288006 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:25:40 -0300 Subject: vulkan_device: Enable VK_EXT_vertex_input_dynamic_state --- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 618535aae..8eb37a77a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -425,6 +425,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); } + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; + if (ext_vertex_input_dynamic_state) { + vertex_input_dynamic = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .vertexInputDynamicState = VK_TRUE, + }; + SetNext(next, vertex_input_dynamic); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -732,6 +744,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; + bool has_ext_vertex_input_dynamic_state{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -763,6 +776,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); + test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, + false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -827,6 +842,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_provoking_vertex = true; } } + if (has_ext_vertex_input_dynamic_state) { + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; + vertex_input.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; + vertex_input.pNext = nullptr; + features.pNext = &vertex_input; + physical.GetFeatures2KHR(features); + + if (vertex_input.vertexInputDynamicState) { + extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + ext_vertex_input_dynamic_state = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/common/settings.h | 3 + .../backend/glasm/emit_glasm_instructions.h | 2 + .../backend/glasm/emit_glasm_not_implemented.cpp | 8 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 24 +++++--- .../backend/spirv/emit_spirv_instructions.h | 2 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../frontend/maxwell/structured_control_flow.cpp | 42 ++++++++++++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 66 +++++++++++++++------- src/video_core/vulkan_common/vulkan_device.cpp | 4 +- src/yuzu/configuration/config.cpp | 4 ++ src/yuzu/configuration/configure_debug.cpp | 8 +++ src/yuzu/configuration/configure_debug.ui | 26 +++++++++ src/yuzu_cmd/config.cpp | 2 + src/yuzu_cmd/default_ini.h | 8 +++ 16 files changed, 183 insertions(+), 35 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/common/settings.h b/src/common/settings.h index ce1bc647d..ac0590690 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -308,6 +308,9 @@ struct Values { // Renderer Setting renderer_backend{RendererBackend::OpenGL, "backend"}; BasicSetting renderer_debug{false, "debug"}; + BasicSetting enable_nsight_aftermath{false, "nsight_aftermath"}; + BasicSetting disable_shader_loop_safety_checks{false, + "disable_shader_loop_safety_checks"}; Setting vulkan_device{0, "vulkan_device"}; Setting resolution_factor{1, "resolution_factor"}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index c9f4826ce..fef9ff9be 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -42,6 +42,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 95bcbd750..60735fe31 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -153,6 +153,14 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } +void EmitSetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 442a958a5..42fff74e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -163,35 +163,43 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde } // Anonymous namespace void EmitGetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); +} + +void EmitSetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1181e7b4f..e3e5b03fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -43,6 +43,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e9fd41237..6c37af5e7 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,12 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + if (pred != IR::Pred::PT) { + Inst(Opcode::SetPred, pred, value); + } +} + U1 IREmitter::GetGotoVariable(u32 id) { return Inst(Opcode::GetGotoVariable, id); } @@ -141,8 +147,12 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -void IREmitter::SetPred(IR::Pred pred, const U1& value) { - Inst(Opcode::SetPred, pred, value); +U32 IREmitter::GetLoopSafetyVariable(u32 id) { + return Inst(Opcode::GetLoopSafetyVariable, id); +} + +void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { + Inst(Opcode::SetLoopSafetyVariable, id, counter); } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..7caab1f61 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,6 +55,9 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); + void SetLoopSafetyVariable(u32 id, const U32& counter); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..e87aeddd5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,6 +32,8 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) +OPCODE(GetLoopSafetyVariable, U32, U32, ) +OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c1e0646e6..b2b8c492a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -9,11 +9,13 @@ #include #include #include +#include #include #include +#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -739,8 +741,25 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - current_block->AddBranch(loop_header_block); + const u32 this_loop_id{loop_id++}; + + if (Settings::values.disable_shader_loop_safety_checks) { + if (current_block) { + current_block->AddBranch(loop_header_block); + } + } else { + IR::Block* const init_block{block_pool.Create(inst_pool)}; + IR::IREmitter ir{*init_block}; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + if (current_block) { + current_block->AddBranch(init_block); + } + init_block->AddBranch(loop_header_block); + + auto& init_node{syntax_list.emplace_back()}; + init_node.type = IR::AbstractSyntaxNode::Type::Block; + init_node.data.block = init_block; } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -758,7 +777,16 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; + const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; + ir.SetLoopSafetyVariable(this_loop_id, new_counter); + + const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; + cond = ir.LogicalAnd(cond, safety_cond); + } + cond = ir.ConditionRef(cond); IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -863,8 +891,14 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - // TODO: Make this constexpr when std::vector is constexpr + u32 loop_id{}; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else const Flow::Block dummy_flow_block; +#endif }; } // Anonymous namespace diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index e54499ba5..a4ba393ef 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,73 +48,91 @@ struct GotoVariable : FlagTag { u32 index; }; +struct LoopSafetyVariable { + LoopSafetyVariable() = default; + explicit LoopSafetyVariable(u32 index_) : index{index_} {} + + auto operator<=>(const LoopSafetyVariable&) const noexcept = default; + + u32 index; +}; + struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = std::variant; -using ValueMap = boost::container::flat_map>; +using Variant = + std::variant; +using ValueMap = boost::container::flat_map; struct DefTable { - const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Reg variable) { return block->SsaRegValue(variable); } - void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { block->SetSsaRegValue(variable, value); } - const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Pred variable) { return preds[IR::PredIndex(variable)][block]; } - void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + const IR::Value& Def(IR::Block* block, GotoVariable variable) { return goto_vars[variable.index][block]; } - void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { + return loop_safety_vars[variable.index][block]; + } + void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { + loop_safety_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } - void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { indirect_branch_var.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { return zero_flag[block]; } - void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { zero_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, SignFlagTag) { return sign_flag[block]; } - void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { sign_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, CarryFlagTag) { return carry_flag[block]; } - void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { carry_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { return overflow_flag[block]; } - void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { overflow_flag.insert_or_assign(block, value); } std::array preds; boost::container::flat_map goto_vars; + boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -134,6 +152,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -315,6 +337,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetLoopSafetyVariable: + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -343,6 +368,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetLoopSafetyVariable: + inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8eb37a77a..bf063c047 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -467,7 +467,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { + if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { @@ -781,7 +781,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { + if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a5e032959..dc69574a9 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -824,6 +824,8 @@ void Config::ReadRendererValues() { if (global) { ReadBasicSetting(Settings::values.renderer_debug); + ReadBasicSetting(Settings::values.enable_nsight_aftermath); + ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); @@ -1353,6 +1355,8 @@ void Config::SaveRendererValues() { if (global) { WriteBasicSetting(Settings::values.renderer_debug); + WriteBasicSetting(Settings::values.enable_nsight_aftermath); + WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 8fceb3878..f7e29dbd7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() { ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_cpu_debugging->setEnabled(runtime_lock); ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); + ui->enable_nsight_aftermath->setEnabled(runtime_lock); + ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); + ui->disable_loop_safety_checks->setEnabled(runtime_lock); + ui->disable_loop_safety_checks->setChecked( + Settings::values.disable_shader_loop_safety_checks.GetValue()); ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); } @@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); + Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); + Settings::values.disable_shader_loop_safety_checks = + ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Debugger::ToggleConsole(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 1260ad6f0..c8baf2921 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -125,6 +125,16 @@ + + + + When checked, it enables Nsight Aftermath crash dumps + + + Enable Nsight Aftermath + + + @@ -138,6 +148,16 @@ + + + + When checked, it executes shaders without loop logic changes + + + Disable Loop safety checks + + + @@ -252,11 +272,17 @@ log_filter_edit toggle_console + extended_logging open_log_button homebrew_args_edit enable_graphics_debugging + enable_nsight_aftermath + disable_macro_jit + disable_loop_safety_checks reporting_services quest_flag + use_debug_asserts + use_auto_stub diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3e22fee37..763df6dd6 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -444,6 +444,8 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); ReadSetting("Renderer", Settings::values.renderer_debug); + ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); + ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); ReadSetting("Renderer", Settings::values.fullscreen_mode); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 88d33ecab..a6ca7b6cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -221,6 +221,14 @@ backend = # 0 (default): Disabled, 1: Enabled debug = +# Enable Nsight Aftermath crash dumps +# 0 (default): Disabled, 1: Enabled +nsight_aftermath = + +# Disable shader loop safety checks, executing the shader without loop logic changes +# 0 (default): Disabled, 1: Enabled +disable_shader_loop_safety_checks = + # Which Vulkan physical device to use (defaults to 0) vulkan_device = -- cgit v1.2.3 From 69f9b97e7ed1e873657105cff27ed9095ee277ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 22:48:55 -0300 Subject: vulkan_device: Blacklist VK_EXT_vertex_input_dynamic_state on Intel --- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index bf063c047..9754abcf8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -491,6 +491,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); ext_extended_dynamic_state = false; } + if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); + ext_vertex_input_dynamic_state = false; + } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 8 +++++--- .../frontend/maxwell/translate_program.h | 3 ++- src/shader_recompiler/host_translate_info.h | 18 ++++++++++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 ++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +++ src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 9 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/host_translate_info.h (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f801a9f72..164e94071 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/translate.h frontend/maxwell/translate_program.cpp frontend/maxwell/translate_program.h + host_translate_info.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +// Try to keep entries here to a minimum +// They can accidentally change the cached information in a shader + +/// Misc information about the host +struct HostTranslateInfo { + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d28..b459397f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + }, + host_info{ + .support_float16 = false, + .support_int64 = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -373,15 +377,15 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); for (const auto& desc : programs[index].info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; for (const auto& desc : program_vb.info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } @@ -449,7 +453,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; u32 num_storage_buffers{}; for (const auto& desc : program.info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d90..6952a1f2c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" @@ -82,6 +83,8 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path shader_cache_filename; std::unique_ptr workers; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf8..0d8c6cd08 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - // is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From ca67077ca87772b4b4ac61d08f5b2c60616348e0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 21:14:57 -0300 Subject: vk_graphics_pipeline: Use VK_KHR_push_descriptor when available ~51% faster on Nvidia compared to previous method. --- src/video_core/renderer_vulkan/pipeline_helper.h | 32 ++++++++++++++++------ .../renderer_vulkan/vk_compute_pipeline.cpp | 8 ++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 ++++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 11 ++++++++ src/video_core/vulkan_common/vulkan_device.h | 12 ++++++++ src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 31 +++++++++++++-------- 8 files changed, 88 insertions(+), 36 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index c6e5e059b..4847db6b6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -16,38 +16,50 @@ #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { class DescriptorLayoutBuilder { public: - DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} - vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + bool CanUsePushDescriptor() const noexcept { + return device->IsKhrPushDescriptorSupported() && + num_descriptors <= device->MaxPushDescriptors(); + } + + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { if (bindings.empty()) { return nullptr; } - return device->CreateDescriptorSetLayout({ + const VkDescriptorSetLayoutCreateFlags flags = + use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + return device->GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, - .flags = 0, + .flags = flags, .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }); } vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) const { + VkPipelineLayout pipeline_layout, + bool use_push_descriptor) const { if (entries.empty()) { return nullptr; } - return device->CreateDescriptorUpdateTemplateKHR({ + const VkDescriptorUpdateTemplateType type = + use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR + : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, .pNext = nullptr, .flags = 0, .descriptorUpdateEntryCount = static_cast(entries.size()), .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .templateType = type, .descriptorSetLayout = descriptor_set_layout, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .pipelineLayout = pipeline_layout, @@ -56,7 +68,7 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { - return device->CreatePipelineLayout({ + return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -97,14 +109,16 @@ private: .stride = sizeof(DescriptorUpdateEntry), }); ++binding; + num_descriptors += descriptors[i].count; offset += sizeof(DescriptorUpdateEntry); } } - const vk::Device* device{}; + const Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; + u32 num_descriptors{}; size_t offset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cc855a62e..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript uniform_buffer_sizes.begin()); auto func{[this, &descriptor_pool, shader_notify] { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(false); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = - builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e02b1b7ab..2b59a9d88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; constexpr size_t MAX_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline( } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - + uses_push_descriptor = builder.CanUsePushDescriptor(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); + if (!uses_push_descriptor) { + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + } const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); - descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + descriptor_update_template = + builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); @@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() { if (!descriptor_set_layout) { return; } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_set, nullptr); + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, + 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } }); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 40d1edabd..622267147 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -148,6 +148,7 @@ private: std::condition_variable build_condvar; std::mutex build_mutex; std::atomic_bool is_built{false}; + bool uses_push_descriptor{false}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0d8c6cd08..9d918de8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -767,6 +767,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); + test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -932,6 +933,16 @@ std::vector Device::LoadExtensions(bool requires_surface) { khr_workgroup_memory_explicit_layout = true; } } + if (khr_push_descriptor) { + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; + push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + push_descriptor.pNext = nullptr; + + physical_properties.pNext = &push_descriptor; + physical.GetProperties2KHR(physical_properties); + + max_push_descriptors = push_descriptor.maxPushDescriptors; + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4fda472b0..49605752d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -154,6 +154,11 @@ public: return guest_warp_stages & stage; } + /// Returns the maximum number of push descriptors. + u32 MaxPushDescriptors() const { + return max_push_descriptors; + } + /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { return is_formatless_image_load_supported; @@ -194,6 +199,11 @@ public: return khr_spirv_1_4; } + /// Returns true if the device supports VK_KHR_push_descriptor. + bool IsKhrPushDescriptorSupported() const { + return khr_push_descriptor; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -330,6 +340,7 @@ private: VkDriverIdKHR driver_id{}; ///< Driver ID. VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. @@ -345,6 +356,7 @@ private: bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 7e13ae8af..d7e9fac22 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); + X(vkCmdPushDescriptorSetWithTemplateKHR); X(vkCmdSetBlendConstants); X(vkCmdSetDepthBias); X(vkCmdSetDepthBounds); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 6e5be1186..d43b606f1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; PFN_vkBindBufferMemory vkBindBufferMemory{}; PFN_vkBindImageMemory vkBindImageMemory{}; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBeginQuery vkCmdBeginQuery{}; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; PFN_vkCmdBindPipeline vkCmdBindPipeline{}; PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; PFN_vkCmdBlitImage vkCmdBlitImage{}; PFN_vkCmdClearAttachments vkCmdClearAttachments{}; PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; @@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDispatch vkCmdDispatch{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdFillBuffer vkCmdFillBuffer{}; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; PFN_vkCmdPushConstants vkCmdPushConstants{}; + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; + PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; - PFN_vkCmdSetEvent vkCmdSetEvent{}; - PFN_vkCmdSetScissor vkCmdSetScissor{}; - PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; - PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; - PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; - PFN_vkCmdSetViewport vkCmdSetViewport{}; - PFN_vkCmdWaitEvents vkCmdWaitEvents{}; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; - PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; + PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; + PFN_vkCmdSetScissor vkCmdSetScissor{}; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; - PFN_vkCmdResolveImage vkCmdResolveImage{}; + PFN_vkCmdSetViewport vkCmdSetViewport{}; + PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; PFN_vkCreateCommandPool vkCreateCommandPool{}; @@ -990,6 +991,12 @@ public: dynamic_offsets.size(), dynamic_offsets.data()); } + void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, + VkPipelineLayout layout, u32 set, + const void* data) const noexcept { + dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); + } + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { dld->vkCmdBindPipeline(handle, bind_point, pipeline); } -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 6 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc..c084f3400 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Wed, 23 Jun 2021 03:32:41 -0300 Subject: vk_graphics_pipeline: Implement conservative rendering --- src/video_core/engines/maxwell_3d.h | 7 ++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++++++++++++++------ src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 6 files changed, 44 insertions(+), 10 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index fc2c36c6b..da2ded671 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -910,7 +910,11 @@ public: u32 fill_rectangle; - INSERT_PADDING_WORDS_NOINIT(0x8); + INSERT_PADDING_WORDS_NOINIT(0x2); + + u32 conservative_raster_enable; + + INSERT_PADDING_WORDS_NOINIT(0x5); std::array vertex_attrib_format; @@ -1615,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); +ASSERT_REG_POSITION(conservative_raster_enable, 0x452); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(multisample_sample_locations, 0x478); ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 16cef8711..7563dc462 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -87,6 +87,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, depth_format.Assign(static_cast(regs.zeta.format)); y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04f34eb97..66b57b636 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -194,6 +194,7 @@ struct FixedPipelineState { BitField<6, 5, u32> depth_format; BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9eb353a88..70e183e65 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -599,16 +599,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pScissors = nullptr, }; - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 - ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT - : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, - }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, + .pNext = nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -625,6 +618,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9d918de8d..7b184d2f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + if (!ext_conservative_rasterization) { + LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); + } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; if (ext_provoking_vertex) { provoking_vertex = { @@ -776,6 +780,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, + true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 40d00a52f..a9c0a0e4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -264,6 +264,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_conservative_rasterization. + bool IsExtConservativeRasterizationSupported() const { + return ext_conservative_rasterization; + } + /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { return ext_provoking_vertex; @@ -374,6 +379,7 @@ private: bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 15 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glsl/emit_context.cpp | 58 +++--- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 4 +- .../backend/spirv/emit_context.cpp | 97 +++++----- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/shader_recompiler/environment.h | 5 + src/shader_recompiler/frontend/ir/attribute.h | 6 + src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 202 +++++++-------------- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/program_header.h | 62 +++---- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/shader_info.h | 37 +--- src/shader_recompiler/varying_state.h | 69 +++++++ src/video_core/engines/maxwell_3d.h | 7 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +- src/video_core/shader_environment.cpp | 10 +- src/video_core/vulkan_common/vulkan_device.cpp | 6 + src/video_core/vulkan_common/vulkan_device.h | 6 + 29 files changed, 345 insertions(+), 331 deletions(-) create mode 100644 src/shader_recompiler/varying_state.h (limited to 'src/video_core/vulkan_common') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3b5708cb9..b5b7e5e83 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -229,6 +229,7 @@ add_library(shader_recompiler STATIC program_header.h runtime_info.h shader_info.h + varying_state.h ) target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 21e14867c..80dad9ff3 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,14 +83,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (generic.used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, attr_stage, index, index); + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads_position) { + if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -102,7 +101,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { @@ -124,8 +123,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 79314f130..2b96977b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -296,8 +296,10 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || - info.stores_viewport_mask) { + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index bc195d248..02c9dc6d7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -261,7 +261,7 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, fmt::format("{}.z", value), fmt::format("{}.w", value)}; read(compare_index, values); }}; - if (ctx.info.loads_position) { + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { const u32 index{static_cast(IR::Attribute::PositionX)}; if (IsInputArray(ctx.stage)) { read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); @@ -269,8 +269,8 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { - if (!ctx.info.input_generics[index].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { continue; } read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 14c009535..0d7f7bc3b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -212,22 +212,22 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { - if (!ctx.info.stores_legacy_varyings) { + if (!ctx.info.stores.Legacy()) { return; } - if (ctx.info.stores_fixed_fnc_textures) { + if (ctx.info.stores.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.stores_color_front_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_FrontColor;"; } - if (ctx.info.stores_color_front_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { header += "vec4 gl_FrontSecondaryColor;"; } - if (ctx.info.stores_color_back_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { header += "vec4 gl_BackColor;"; } - if (ctx.info.stores_color_back_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { header += "vec4 gl_BackSecondaryColor;"; } } @@ -237,32 +237,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { return; } header += "out gl_PerVertex{vec4 gl_Position;"; - if (ctx.info.stores_point_size) { + if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; } - if (ctx.info.stores_clip_distance) { + if (ctx.info.stores.ClipDistances()) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && - ctx.stage != Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } SetupLegacyOutPerVertex(ctx, header); header += "};"; - if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { - if (!ctx.info.loads_legacy_varyings) { + if (!ctx.info.loads.Legacy()) { return; } header += "in gl_PerFragment{"; - if (ctx.info.loads_fixed_fnc_textures) { + if (ctx.info.loads.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.loads_color_front_diffuse) { + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_Color;"; } header += "};"; @@ -325,14 +325,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupOutPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += - fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,11 +348,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (!info.stores_generics[index]) { - continue; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); } - DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -398,14 +396,14 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if ((info.stores_viewport_index || info.stores_layer) && + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } if (info.uses_typeless_image_reads) { @@ -535,20 +533,20 @@ void EmitContext::DefineHelperFunctions() { fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { const auto position_idx{is_array ? "gl_in[vertex]." : ""}; func += fmt::format("case {}:return {}{}[masked_index];", static_cast(IR::Attribute::PositionX) >> 2, position_idx, position_name); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; func += fmt::format("case {}:return in_attr{}{}[masked_index];", - base_attribute_value + i, i, vertex_idx); + base_attribute_value + index, index, vertex_idx); } func += "default: return 0.0;}}"; header += func; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da2..8deaf5760 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -171,7 +171,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3d2ba2eee..16e2a8502 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,7 +179,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { ctx.AddF32("{}=0.f;", inst, attr); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 6420aaa21..298881c7b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -20,8 +20,8 @@ void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { - if (!ctx.info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { continue; } const auto& info_array{ctx.output_generics.at(index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4c6501129..af4fb0c69 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -557,7 +557,7 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); + DefineInputs(program); DefineOutputs(program); } @@ -693,16 +693,16 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + index); labels.push_back(OpLabel()); } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); @@ -710,7 +710,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Const(0.0f)); size_t label_index{0}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{is_array ? OpAccessChain(input_f32, input_position, vertex, masked_index) @@ -719,18 +719,18 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturnValue(result); ++label_index; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - if (!info.input_generics[i].used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast(i))}; + const auto type{AttrTypes(*this, static_cast(index))}; if (!type) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(i)}; + const Id generic_id{input_generics.at(index)}; const Id pointer{is_array ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) : OpAccessChain(type->pointer, generic_id, masked_index)}; @@ -758,19 +758,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - literals.push_back(base_attribute_value + static_cast(i)); + literals.push_back(base_attribute_value + static_cast(index)); labels.push_back(OpLabel()); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { literals.push_back(static_cast(IR::Attribute::ClipDistance0) >> 2); labels.push_back(OpLabel()); literals.push_back(static_cast(IR::Attribute::ClipDistance4) >> 2); @@ -781,28 +781,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturn(); size_t label_index{0}; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - if (output_generics[i][0].num_components != 4) { + if (output_generics[index][0].num_components != 4) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[i][0].id}; + const Id generic_id{output_generics[index][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); @@ -1146,7 +1146,10 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1183,15 +1186,20 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } - if (info.loads_primitive_id) { + if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); } - if (info.loads_position) { + if (loads.AnyComponent(IR::Attribute::PositionX)) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } } - if (info.loads_instance_id) { + if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { @@ -1199,7 +1207,7 @@ void EmitContext::DefineInputs(const Info& info) { base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } - if (info.loads_vertex_id) { + if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { @@ -1207,24 +1215,24 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } - if (info.loads_front_face) { + if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } - if (info.loads_point_coord) { + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); } - if (info.loads_tess_coord) { + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } - for (size_t index = 0; index < info.input_generics.size(); ++index) { - if (!runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { continue; } - const InputVarying generic{info.input_generics[index]}; - if (!generic.used) { + if (!loads.Generic(index)) { continue; } - const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1234,10 +1242,13 @@ void EmitContext::DefineInputs(const Info& info) { Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } if (stage != Stage::Fragment) { continue; } - switch (generic.interpolation) { + switch (info.interpolation[index]) { case Interpolation::Smooth: // Default // Decorate(id, spv::Decoration::Smooth); @@ -1266,42 +1277,42 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const IR::Program& program) { const Info& info{program.info}; const std::optional invocations{program.invocations}; - if (info.stores_position || stage == Stage::VertexB) { + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || runtime_info.fixed_state_point_size) { + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } - if (info.stores_layer && + if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } - if (info.stores_viewport_index && + if (info.stores[IR::Attribute::ViewportIndex] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 527685fb8..e277bc358 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -300,7 +300,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineInputs(const Info& info); + void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 278c262f8..ddb86d070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -281,11 +281,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); break; } - if (program.info.stores_point_size) { + if (program.info.stores[IR::Attribute::PointSize]) { ctx.AddCapability(spv::Capability::GeometryPointSize); } ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; @@ -377,20 +385,21 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_viewport_index) { + if (info.stores[IR::Attribute::ViewportIndex]) { ctx.AddCapability(spv::Capability::MultiViewport); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { ctx.AddExtension("SPV_NV_viewport_array2"); ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); } - if (info.stores_layer || info.stores_viewport_index) { + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 85bd72389..77fbb2b2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,7 +298,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 090bc1c08..8369d0d84 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -31,6 +31,10 @@ public: return sph; } + [[nodiscard]] const std::array& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + [[nodiscard]] Stage ShaderStage() const noexcept { return stage; } @@ -41,6 +45,7 @@ public: protected: ProgramHeader sph{}; + std::array gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; }; diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30..ca1199494 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast(static_cast(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d..ebcaa8bc2 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152..5e32ac784 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); } - info.loads_position |= header.vtg.imap_systemb.position != 0; + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings } if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d46be1638..ee1887b56 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -34,6 +34,7 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 6933750aa..bd6c2bfb5 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -37,7 +37,9 @@ struct ProgramHeader { BitField<15, 1, u32> kills_pixels; BitField<16, 1, u32> does_global_store; BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; @@ -79,24 +81,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } imap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } imap_generic_vector[16]; + std::array imap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { @@ -122,24 +110,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } omap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } omap_generic_vector[16]; + std::array omap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // OmapColor @@ -157,18 +131,24 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return imap_generic_vector[index >> 1].first != 0; - } - return imap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } - [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return omap_generic_vector[index >> 1].first != 0; - } - return omap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } } vtg; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 63fe2afaf..f3f83a258 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "shader_recompiler/varying_state.h" namespace Shader { @@ -60,7 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; - std::bitset<32> previous_stage_stores_generic{}; + VaryingState previous_stage_stores; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a20e15d2e..4ef4dbd40 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" #include #include @@ -44,11 +45,6 @@ enum class Interpolation { NoPerspective, }; -struct InputVarying { - Interpolation interpolation{Interpolation::Smooth}; - bool used{false}; -}; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -121,18 +117,10 @@ struct Info { bool uses_subgroup_shuffles{}; std::array uses_patches{}; - std::array input_generics{}; - bool loads_primitive_id{}; - bool loads_position{}; - bool loads_color_front_diffuse{}; - bool loads_fixed_fnc_textures{}; - bool loads_point_coord{}; - bool loads_instance_id{}; - bool loads_vertex_id{}; - bool loads_front_face{}; - bool loads_legacy_varyings{}; - - bool loads_tess_coord{}; + std::array interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; bool loads_indexed_attributes{}; @@ -140,21 +128,6 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::bitset<32> stores_generics{}; - bool stores_layer{}; - bool stores_viewport_index{}; - bool stores_point_size{}; - bool stores_position{}; - bool stores_color_front_diffuse{}; - bool stores_color_front_specular{}; - bool stores_color_back_diffuse{}; - bool stores_color_back_specular{}; - bool stores_fixed_fnc_textures{}; - bool stores_clip_distance{}; - bool stores_fog_coordinate{}; - bool stores_viewport_mask{}; - bool stores_legacy_varyings{}; - bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] || mask[static_cast(base) + 1] || + mask[static_cast(base) + 2] || mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] && mask[static_cast(base) + 1] && + mask[static_cast(base) + 2] && mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded671..471d5686a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b7745..06e39a503 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b56..429cab30d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f8..da4721e6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d..d0adc0127 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 57a8921e01a90ff5993079dd638a6c48e5781756 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:21:51 -0300 Subject: vk_graphics_pipeline: Implement line width --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 1 + src/video_core/renderer_vulkan/vk_state_tracker.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 7 ++++++- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 5 +++++ 8 files changed, 36 insertions(+), 8 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6d664ed6b..3363a6877 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -705,11 +705,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_LINE_WIDTH, }; if (key.state.extended_dynamic_state) { static constexpr std::array extended{ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f04c3394c..bb7301c53 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -541,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthBoundsTestEnable(regs); @@ -676,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineWidth()) { + return; + } + const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; + scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c954fa7f8..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -125,6 +125,7 @@ private: void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 0ebe0473f..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,10 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, - DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, - DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, - StencilOp, StencilTestEnable, VertexBuffers, VertexInput, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, + DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -86,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(cull_face)] = CullMode; @@ -180,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); SetupDirtyDepthBoundsEnable(tables); SetupDirtyDepthTestEnable(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1976b7e9b..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -31,6 +31,7 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + LineWidth, CullMode, DepthBoundsEnable, @@ -44,7 +45,7 @@ enum : u8 { Blending, ViewportSwizzles, - Last + Last, }; static_assert(Last <= std::numeric_limits::max()); @@ -93,6 +94,10 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchLineWidth() const { + return Exchange(Dirty::LineWidth, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index da4721e6b..912e03c5c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -227,7 +227,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthBiasClamp = true, .fillModeNonSolid = true, .depthBounds = is_depth_bounds_supported, - .wideLines = false, + .wideLines = true, .largePoints = true, .alphaToOne = false, .multiViewport = true, @@ -703,7 +703,6 @@ void Device::CheckSuitability(bool requires_swapchain) const { const std::array feature_report{ std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), @@ -712,6 +711,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), + std::make_pair(features.wideLines, "wideLines"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d7e9fac22..bbf0fccae 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -121,6 +121,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthTestEnableEXT); X(vkCmdSetDepthWriteEnableEXT); X(vkCmdSetFrontFaceEXT); + X(vkCmdSetLineWidth); X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index d43b606f1..d76bb4324 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -231,6 +231,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; + PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetScissor vkCmdSetScissor{}; PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; @@ -1198,6 +1199,10 @@ public: dld->vkCmdSetFrontFaceEXT(handle, front_face); } + void SetLineWidth(float line_width) const noexcept { + dld->vkCmdSetLineWidth(handle, line_width); + } + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); } -- cgit v1.2.3 From f94f0be5215369a6985247ad936d9d9f43c9b140 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:25:19 -0300 Subject: vk_graphics_pipeline: Implement smooth lines --- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +++++++++++ src/video_core/vulkan_common/vulkan_device.cpp | 41 +++++++++++++++++++--- src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 5 files changed, 65 insertions(+), 5 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 7563dc462..d089da8a4 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -88,6 +88,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); + smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 66b57b636..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -195,6 +195,7 @@ struct FixedPipelineState { BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; BitField<13, 1, u32> conservative_raster_enable; + BitField<14, 1, u32> smooth_lines; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3363a6877..f0ae0b0d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -80,6 +80,14 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); } +bool IsLine(VkPrimitiveTopology topology) { + static constexpr std::array line_topologies{ + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, + // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, + }; + return std::ranges::find(line_topologies, topology) == line_topologies.end(); +} + VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { union Swizzle { u32 raw; @@ -616,6 +624,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .lineRasterizationMode = key.state.smooth_lines != 0 + ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT + : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, + .stippledLineEnable = VK_FALSE, // TODO + .lineStippleFactor = 0, + .lineStipplePattern = 0, + }; VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, .pNext = nullptr, @@ -632,6 +650,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; + if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { + line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); + } if (device.IsExtConservativeRasterizationSupported()) { conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 912e03c5c..4fa1470e8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -416,6 +416,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + if (ext_line_rasterization) { + line_raster = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, + .pNext = nullptr, + .rectangularLines = VK_TRUE, + .bresenhamLines = VK_FALSE, + .smoothLines = VK_TRUE, + .stippledRectangularLines = VK_FALSE, + .stippledBresenhamLines = VK_FALSE, + .stippledSmoothLines = VK_FALSE, + }; + SetNext(next, line_raster); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); + } + if (!ext_conservative_rasterization) { LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); } @@ -757,6 +774,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; + bool has_ext_line_rasterization{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -798,6 +816,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -918,17 +937,29 @@ std::vector Device::LoadExtensions(bool requires_surface) { } } if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - features.pNext = &dynamic_state; + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; + extended_dynamic_state.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + extended_dynamic_state.pNext = nullptr; + features.pNext = &extended_dynamic_state; physical.GetFeatures2KHR(features); - if (dynamic_state.extendedDynamicState) { + if (extended_dynamic_state.extendedDynamicState) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); ext_extended_dynamic_state = true; } } + if (has_ext_line_rasterization) { + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; + line_raster.pNext = nullptr; + features.pNext = &line_raster; + physical.GetFeatures2KHR(features); + if (line_raster.rectangularLines && line_raster.smoothLines) { + extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); + ext_line_rasterization = true; + } + } if (has_khr_workgroup_memory_explicit_layout) { VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; layout.sType = diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d0adc0127..26100166f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -259,6 +259,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_line_rasterization. + bool IsExtLineRasterizationSupported() const { + return ext_line_rasterization; + } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { return ext_vertex_input_dynamic_state; @@ -382,6 +387,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. -- cgit v1.2.3 From 57171b23f9da0d9fa4b07bb77ba5c8ed0083a792 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:58:32 -0300 Subject: vulkan_device: Enable VK_EXT_extended_dynamic_state on RADV 21.2 onward --- src/video_core/vulkan_common/vulkan_device.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4fa1470e8..e297a3e92 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -511,10 +511,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectToolingInfo(); if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { - LOG_WARNING( - Render_Vulkan, - "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); - ext_extended_dynamic_state = false; + // Mask driver version variant + const u32 version = (properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { + LOG_WARNING(Render_Vulkan, + "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } } if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); -- cgit v1.2.3 From 7277d7fe96d53ae2b73491d91e0a54caf0206fe7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 3 Jul 2021 01:49:59 -0400 Subject: vulkan_device: Blacklist ampere devices from float16 math --- src/video_core/vulkan_common/vulkan_device.cpp | 29 ++++++++++++++++++-------- src/video_core/vulkan_common/vulkan_device.h | 6 +++--- 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e297a3e92..7d66a43e7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -194,12 +194,22 @@ std::unordered_map GetFormatProperties(vk::Physica return format_properties; } +std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); + std::vector supported_extensions(std::size(extensions)); + for (const auto& extension : extensions) { + supported_extensions.emplace_back(extension.extensionName); + } + return supported_extensions; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical)} { + supported_extensions{GetSupportedExtensions(physical)}, + format_properties(GetFormatProperties(physical)) { CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); @@ -510,6 +520,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { + if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != + supported_extensions.end()) { + LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); + is_float16_supported = false; + } + } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { // Mask driver version variant const u32 version = (properties.driverVersion << 3) >> 3; @@ -778,10 +795,10 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; bool has_ext_line_rasterization{}; - for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional> status, const char* name, bool push) { - if (extension.extensionName != std::string_view(name)) { + if (extension != name) { return; } if (push) { @@ -1064,12 +1081,6 @@ void Device::CollectTelemetryParameters() { driver_id = driver.driverID; vendor_name = driver.driverName; - - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - reported_extensions.reserve(std::size(extensions)); - for (const auto& extension : extensions) { - reported_extensions.emplace_back(extension.extensionName); - } } void Device::CollectPhysicalMemoryInfo() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 26100166f..df394e384 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -301,7 +301,7 @@ public: /// Returns the list of available extensions. const std::vector& GetAvailableExtensions() const { - return reported_extensions; + return supported_extensions; } u64 GetDeviceLocalMemory() const { @@ -398,8 +398,8 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector reported_extensions; ///< Reported Vulkan extensions. + std::string vendor_name; ///< Device's driver name. + std::vector supported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. std::unordered_map format_properties; -- cgit v1.2.3 From 55233c2861a72bd777b75bce20c8d4e46c17a72f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 3 Jul 2021 03:07:50 -0400 Subject: vulkan_device: Add missing include algorithm --- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7d66a43e7..ceaee8a7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include -- cgit v1.2.3 From be54aad1c40bb50c71e7bcd6465c2fd372c11cb7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Jul 2021 00:18:30 -0300 Subject: maxwell_to_vk: Add R16_SNORM --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f9b9a11a..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -157,7 +157,7 @@ struct FormatTuple { {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM - {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ceaee8a7e..13d938434 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -116,6 +116,7 @@ std::unordered_map GetFormatProperties(vk::Physica VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, -- cgit v1.2.3 From f6796cad9c4259aa13aab1f8b2e27392e07432b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:51:10 -0300 Subject: vulkan_device: Blacklist Volta and older from VK_KHR_push_descriptor Causes crashes on Link's Awakening intro. It's hard to debug if it's our fault due to bugs in validation layers. --- src/video_core/vulkan_common/vulkan_device.cpp | 43 +++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'src/video_core/vulkan_common') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 13d938434..44afdc1cd 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -34,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ }; } // namespace Alternatives +enum class NvidiaArchitecture { + AmpereOrNewer, + Turing, + VoltaOrOlder, +}; + constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_MAINTENANCE1_EXTENSION_NAME, VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, @@ -198,13 +204,34 @@ std::unordered_map GetFormatProperties(vk::Physica std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector supported_extensions(std::size(extensions)); + std::vector supported_extensions; + supported_extensions.reserve(extensions.size()); for (const auto& extension : extensions) { supported_extensions.emplace_back(extension.extensionName); } return supported_extensions; } +NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, + std::span exts) { + if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; + shading_rate_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + VkPhysicalDeviceProperties2KHR physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + physical_properties.pNext = &shading_rate_props; + physical.GetProperties2KHR(physical_properties); + if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { + // Only Ampere and newer support this feature + return NvidiaArchitecture::AmpereOrNewer; + } + } + if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { + return NvidiaArchitecture::Turing; + } + return NvidiaArchitecture::VoltaOrOlder; +} } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, @@ -522,11 +549,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { - if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != - supported_extensions.end()) { + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + switch (arch) { + case NvidiaArchitecture::AmpereOrNewer: LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); is_float16_supported = false; + break; + case NvidiaArchitecture::Turing: + break; + case NvidiaArchitecture::VoltaOrOlder: + LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); + khr_push_descriptor = false; + break; } } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { -- cgit v1.2.3