From 6e2ca7fbee8b7de964926da392ab0550b2a76475 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 18:50:48 -0300 Subject: buffer_cache: Simplify clear logic Use existing helper functions and avoid looping when only one buffer has to be active. --- src/video_core/buffer_cache/buffer_cache.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..5a0b6f0c0 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -553,13 +553,9 @@ bool BufferCache

::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { ClearDownload(subtract_interval); common_ranges.subtract(subtract_interval); - BufferId buffer; - do { - has_deleted_buffers = false; - buffer = FindBuffer(*cpu_dst_address, static_cast(size)); - } while (has_deleted_buffers); + const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast(size)); auto& dest_buffer = slot_buffers[buffer]; - const u32 offset = static_cast(*cpu_dst_address - dest_buffer.CpuAddr()); + const u32 offset = dest_buffer.Offset(*cpu_dst_address); runtime.ClearBuffer(dest_buffer, offset, size, value); return true; } -- cgit v1.2.3 From a0c4557557172b3139e3b53a42e31338900037c6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 18:51:42 -0300 Subject: gl_buffer_cache: Use glClearNamedBufferSubData:GL_RED instead of GL_RGBA Avoids reading out of bounds from the stack. --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..a02a45e04 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -100,7 +100,7 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast(offset), - static_cast(size / sizeof(u32)), GL_RGBA, GL_UNSIGNED_INT, + static_cast(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT, &value); } -- cgit v1.2.3 From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 15 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 134 + src/shader_recompiler/backend/spirv/emit_spirv.h | 314 +- .../spirv/emit_spirv_bitwise_conversion.cpp | 57 + .../backend/spirv/emit_spirv_composite.cpp | 105 + .../backend/spirv/emit_spirv_context_get_set.cpp | 102 + .../backend/spirv/emit_spirv_control_flow.cpp | 30 + .../backend/spirv/emit_spirv_floating_point.cpp | 220 ++ .../backend/spirv/emit_spirv_integer.cpp | 132 + .../backend/spirv/emit_spirv_logical.cpp | 89 + .../backend/spirv/emit_spirv_memory.cpp | 125 + .../backend/spirv/emit_spirv_select.cpp | 25 + .../backend/spirv/emit_spirv_undefined.cpp | 29 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 +- src/shader_recompiler/frontend/ir/opcodes.inc | 12 +- .../frontend/maxwell/translate/translate.cpp | 10 +- .../ir_opt/identity_removal_pass.cpp | 2 +- src/shader_recompiler/main.cpp | 21 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 3166 -------------------- .../renderer_vulkan/vk_shader_decompiler.h | 99 - 21 files changed, 1401 insertions(+), 3300 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_select.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_decompiler.h (limited to 'src/video_core') diff --git a/externals/sirit b/externals/sirit index eefca56af..1f7b70730 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit eefca56afd49379bdebc97ded8b480839f930881 +Subproject commit 1f7b70730d610cfbd5099ab93dd38ec8a78e7e35 diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 248e90d4b..12fbcb37c 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,5 +1,16 @@ add_executable(shader_recompiler + backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_bitwise_conversion.cpp + backend/spirv/emit_spirv_composite.cpp + backend/spirv/emit_spirv_context_get_set.cpp + backend/spirv/emit_spirv_control_flow.cpp + backend/spirv/emit_spirv_floating_point.cpp + backend/spirv/emit_spirv_integer.cpp + backend/spirv/emit_spirv_logical.cpp + backend/spirv/emit_spirv_memory.cpp + backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_undefined.cpp environment.h exception.h file_environment.cpp @@ -72,7 +83,9 @@ add_executable(shader_recompiler main.cpp object_pool.h ) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt) + +target_include_directories(video_core PRIVATE sirit) +target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) if (MSVC) target_compile_options(shader_recompiler PRIVATE diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 000000000..7c4269fad --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -0,0 +1,134 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::SPIRV { + +EmitContext::EmitContext(IR::Program& program) { + AddCapability(spv::Capability::Shader); + AddCapability(spv::Capability::Float16); + AddCapability(spv::Capability::Float64); + void_id = TypeVoid(); + + u1 = Name(TypeBool(), "u1"); + f32.Define(*this, TypeFloat(32), "f32"); + u32.Define(*this, TypeInt(32, false), "u32"); + f16.Define(*this, TypeFloat(16), "f16"); + f64.Define(*this, TypeFloat(64), "f64"); + + for (const IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + block_label_map.emplace_back(block, OpLabel()); + } + } + std::ranges::sort(block_label_map, {}, &std::pair::first); +} + +EmitContext::~EmitContext() = default; + +EmitSPIRV::EmitSPIRV(IR::Program& program) { + EmitContext ctx{program}; + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + // FIXME: Forward declare functions (needs sirit support) + Id func{}; + for (IR::Function& function : program.functions) { + func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); + for (IR::Block* const block : function.blocks) { + ctx.AddLabel(ctx.BlockLabel(block)); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); + } + } + ctx.OpFunctionEnd(); + } + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main"); + + std::vector result{ctx.Assemble()}; + std::FILE* file{std::fopen("shader.spv", "wb")}; + std::fwrite(result.data(), sizeof(u32), result.size(), file); + std::fclose(file); + std::system("spirv-dis shader.spv"); + std::system("spirv-val shader.spv"); +} + +template +static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { + using M = decltype(method); + using std::is_invocable_r_v; + if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx)); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), + ctx.Def(inst->Arg(2)))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, ctx.Def(inst->Arg(0)), inst->Arg(1).U32())); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0))); + } else if constexpr (is_invocable_r_v) { + ctx.Define(inst, (emit.*method)(ctx, inst->Arg(0), inst->Arg(1))); + } else if constexpr (is_invocable_r_v) { + (emit.*method)(ctx, inst); + } else if constexpr (is_invocable_r_v) { + (emit.*method)(ctx); + } else { + static_assert(false, "Bad format"); + } +} + +void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +void EmitSPIRV::EmitPhi(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitVoid(EmitContext&) {} + +void EmitSPIRV::EmitIdentity(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 99cc8e08a..3f4b68a7d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,18 +4,326 @@ #pragma once +#include + +#include + +#include "common/common_types.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { +using Sirit::Id; + +class DefMap { +public: + void Define(IR::Inst* inst, Id def_id) { + const InstInfo info{.use_count{inst->UseCount()}, .def_id{def_id}}; + const auto it{map.insert(map.end(), std::make_pair(inst, info))}; + if (it == map.end()) { + throw LogicError("Defining already defined instruction"); + } + } + + [[nodiscard]] Id Consume(IR::Inst* inst) { + const auto it{map.find(inst)}; + if (it == map.end()) { + throw LogicError("Consuming undefined instruction"); + } + const Id def_id{it->second.def_id}; + if (--it->second.use_count == 0) { + map.erase(it); + } + return def_id; + } + +private: + struct InstInfo { + int use_count; + Id def_id; + }; + + boost::container::flat_map map; +}; + +class VectorTypes { +public: + void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { + defs[0] = sirit_ctx.Name(base_type, name); + + std::array def_name; + for (int i = 1; i < 4; ++i) { + const std::string_view def_name_view( + def_name.data(), + fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); + defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + } + } + + [[nodiscard]] Id operator[](size_t size) const noexcept { + return defs[size - 1]; + } + +private: + std::array defs; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(IR::Program& program); + ~EmitContext(); + + [[nodiscard]] Id Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return def_map.Consume(value.Inst()); + } + switch (value.Type()) { + case IR::Type::U32: + return Constant(u32[1], value.U32()); + case IR::Type::F32: + return Constant(f32[1], value.F32()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } + } + + void Define(IR::Inst* inst, Id def_id) { + def_map.Define(inst, def_id); + } + + [[nodiscard]] Id BlockLabel(IR::Block* block) const { + const auto it{std::ranges::lower_bound(block_label_map, block, {}, + &std::pair::first)}; + if (it == block_label_map.end()) { + throw LogicError("Undefined block"); + } + return it->second; + } + + Id void_id{}; + Id u1{}; + VectorTypes f32; + VectorTypes u32; + VectorTypes f16; + VectorTypes f64; + + Id workgroup_id{}; + Id local_invocation_id{}; + +private: + DefMap def_map; + std::vector> block_label_map; +}; + class EmitSPIRV { public: + explicit EmitSPIRV(IR::Program& program); + private: + void EmitInst(EmitContext& ctx, IR::Inst* inst); + // Microinstruction emitters -#define OPCODE(name, result_type, ...) void Emit##name(EmitContext& ctx, IR::Inst* inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE + void EmitPhi(EmitContext& ctx); + void EmitVoid(EmitContext& ctx); + void EmitIdentity(EmitContext& ctx); + void EmitBranch(EmitContext& ctx, IR::Inst* inst); + void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); + void EmitExit(EmitContext& ctx); + void EmitReturn(EmitContext& ctx); + void EmitUnreachable(EmitContext& ctx); + void EmitGetRegister(EmitContext& ctx); + void EmitSetRegister(EmitContext& ctx); + void EmitGetPred(EmitContext& ctx); + void EmitSetPred(EmitContext& ctx); + Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); + void EmitGetAttribute(EmitContext& ctx); + void EmitSetAttribute(EmitContext& ctx); + void EmitGetAttributeIndexed(EmitContext& ctx); + void EmitSetAttributeIndexed(EmitContext& ctx); + void EmitGetZFlag(EmitContext& ctx); + void EmitGetSFlag(EmitContext& ctx); + void EmitGetCFlag(EmitContext& ctx); + void EmitGetOFlag(EmitContext& ctx); + void EmitSetZFlag(EmitContext& ctx); + void EmitSetSFlag(EmitContext& ctx); + void EmitSetCFlag(EmitContext& ctx); + void EmitSetOFlag(EmitContext& ctx); + Id EmitWorkgroupId(EmitContext& ctx); + Id EmitLocalInvocationId(EmitContext& ctx); + void EmitUndef1(EmitContext& ctx); + void EmitUndef8(EmitContext& ctx); + void EmitUndef16(EmitContext& ctx); + void EmitUndef32(EmitContext& ctx); + void EmitUndef64(EmitContext& ctx); + void EmitLoadGlobalU8(EmitContext& ctx); + void EmitLoadGlobalS8(EmitContext& ctx); + void EmitLoadGlobalU16(EmitContext& ctx); + void EmitLoadGlobalS16(EmitContext& ctx); + void EmitLoadGlobal32(EmitContext& ctx); + void EmitLoadGlobal64(EmitContext& ctx); + void EmitLoadGlobal128(EmitContext& ctx); + void EmitWriteGlobalU8(EmitContext& ctx); + void EmitWriteGlobalS8(EmitContext& ctx); + void EmitWriteGlobalU16(EmitContext& ctx); + void EmitWriteGlobalS16(EmitContext& ctx); + void EmitWriteGlobal32(EmitContext& ctx); + void EmitWriteGlobal64(EmitContext& ctx); + void EmitWriteGlobal128(EmitContext& ctx); + void EmitLoadStorageU8(EmitContext& ctx); + void EmitLoadStorageS8(EmitContext& ctx); + void EmitLoadStorageU16(EmitContext& ctx); + void EmitLoadStorageS16(EmitContext& ctx); + Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); + void EmitLoadStorage64(EmitContext& ctx); + void EmitLoadStorage128(EmitContext& ctx); + void EmitWriteStorageU8(EmitContext& ctx); + void EmitWriteStorageS8(EmitContext& ctx); + void EmitWriteStorageU16(EmitContext& ctx); + void EmitWriteStorageS16(EmitContext& ctx); + void EmitWriteStorage32(EmitContext& ctx); + void EmitWriteStorage64(EmitContext& ctx); + void EmitWriteStorage128(EmitContext& ctx); + void EmitCompositeConstructU32x2(EmitContext& ctx); + void EmitCompositeConstructU32x3(EmitContext& ctx); + void EmitCompositeConstructU32x4(EmitContext& ctx); + void EmitCompositeExtractU32x2(EmitContext& ctx); + Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); + void EmitCompositeExtractU32x4(EmitContext& ctx); + void EmitCompositeConstructF16x2(EmitContext& ctx); + void EmitCompositeConstructF16x3(EmitContext& ctx); + void EmitCompositeConstructF16x4(EmitContext& ctx); + void EmitCompositeExtractF16x2(EmitContext& ctx); + void EmitCompositeExtractF16x3(EmitContext& ctx); + void EmitCompositeExtractF16x4(EmitContext& ctx); + void EmitCompositeConstructF32x2(EmitContext& ctx); + void EmitCompositeConstructF32x3(EmitContext& ctx); + void EmitCompositeConstructF32x4(EmitContext& ctx); + void EmitCompositeExtractF32x2(EmitContext& ctx); + void EmitCompositeExtractF32x3(EmitContext& ctx); + void EmitCompositeExtractF32x4(EmitContext& ctx); + void EmitCompositeConstructF64x2(EmitContext& ctx); + void EmitCompositeConstructF64x3(EmitContext& ctx); + void EmitCompositeConstructF64x4(EmitContext& ctx); + void EmitCompositeExtractF64x2(EmitContext& ctx); + void EmitCompositeExtractF64x3(EmitContext& ctx); + void EmitCompositeExtractF64x4(EmitContext& ctx); + void EmitSelect8(EmitContext& ctx); + void EmitSelect16(EmitContext& ctx); + void EmitSelect32(EmitContext& ctx); + void EmitSelect64(EmitContext& ctx); + void EmitBitCastU16F16(EmitContext& ctx); + Id EmitBitCastU32F32(EmitContext& ctx, Id value); + void EmitBitCastU64F64(EmitContext& ctx); + void EmitBitCastF16U16(EmitContext& ctx); + Id EmitBitCastF32U32(EmitContext& ctx, Id value); + void EmitBitCastF64U64(EmitContext& ctx); + void EmitPackUint2x32(EmitContext& ctx); + void EmitUnpackUint2x32(EmitContext& ctx); + void EmitPackFloat2x16(EmitContext& ctx); + void EmitUnpackFloat2x16(EmitContext& ctx); + void EmitPackDouble2x32(EmitContext& ctx); + void EmitUnpackDouble2x32(EmitContext& ctx); + void EmitGetZeroFromOp(EmitContext& ctx); + void EmitGetSignFromOp(EmitContext& ctx); + void EmitGetCarryFromOp(EmitContext& ctx); + void EmitGetOverflowFromOp(EmitContext& ctx); + void EmitFPAbs16(EmitContext& ctx); + void EmitFPAbs32(EmitContext& ctx); + void EmitFPAbs64(EmitContext& ctx); + Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); + Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); + Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); + void EmitFPMax32(EmitContext& ctx); + void EmitFPMax64(EmitContext& ctx); + void EmitFPMin32(EmitContext& ctx); + void EmitFPMin64(EmitContext& ctx); + Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + void EmitFPNeg16(EmitContext& ctx); + void EmitFPNeg32(EmitContext& ctx); + void EmitFPNeg64(EmitContext& ctx); + void EmitFPRecip32(EmitContext& ctx); + void EmitFPRecip64(EmitContext& ctx); + void EmitFPRecipSqrt32(EmitContext& ctx); + void EmitFPRecipSqrt64(EmitContext& ctx); + void EmitFPSqrt(EmitContext& ctx); + void EmitFPSin(EmitContext& ctx); + void EmitFPSinNotReduced(EmitContext& ctx); + void EmitFPExp2(EmitContext& ctx); + void EmitFPExp2NotReduced(EmitContext& ctx); + void EmitFPCos(EmitContext& ctx); + void EmitFPCosNotReduced(EmitContext& ctx); + void EmitFPLog2(EmitContext& ctx); + void EmitFPSaturate16(EmitContext& ctx); + void EmitFPSaturate32(EmitContext& ctx); + void EmitFPSaturate64(EmitContext& ctx); + void EmitFPRoundEven16(EmitContext& ctx); + void EmitFPRoundEven32(EmitContext& ctx); + void EmitFPRoundEven64(EmitContext& ctx); + void EmitFPFloor16(EmitContext& ctx); + void EmitFPFloor32(EmitContext& ctx); + void EmitFPFloor64(EmitContext& ctx); + void EmitFPCeil16(EmitContext& ctx); + void EmitFPCeil32(EmitContext& ctx); + void EmitFPCeil64(EmitContext& ctx); + void EmitFPTrunc16(EmitContext& ctx); + void EmitFPTrunc32(EmitContext& ctx); + void EmitFPTrunc64(EmitContext& ctx); + Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); + void EmitIAdd64(EmitContext& ctx); + Id EmitISub32(EmitContext& ctx, Id a, Id b); + void EmitISub64(EmitContext& ctx); + Id EmitIMul32(EmitContext& ctx, Id a, Id b); + void EmitINeg32(EmitContext& ctx); + void EmitIAbs32(EmitContext& ctx); + Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); + void EmitShiftRightLogical32(EmitContext& ctx); + void EmitShiftRightArithmetic32(EmitContext& ctx); + void EmitBitwiseAnd32(EmitContext& ctx); + void EmitBitwiseOr32(EmitContext& ctx); + void EmitBitwiseXor32(EmitContext& ctx); + void EmitBitFieldInsert(EmitContext& ctx); + void EmitBitFieldSExtract(EmitContext& ctx); + Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); + void EmitSLessThan(EmitContext& ctx); + void EmitULessThan(EmitContext& ctx); + void EmitIEqual(EmitContext& ctx); + void EmitSLessThanEqual(EmitContext& ctx); + void EmitULessThanEqual(EmitContext& ctx); + void EmitSGreaterThan(EmitContext& ctx); + void EmitUGreaterThan(EmitContext& ctx); + void EmitINotEqual(EmitContext& ctx); + void EmitSGreaterThanEqual(EmitContext& ctx); + Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); + void EmitLogicalOr(EmitContext& ctx); + void EmitLogicalAnd(EmitContext& ctx); + void EmitLogicalXor(EmitContext& ctx); + void EmitLogicalNot(EmitContext& ctx); + void EmitConvertS16F16(EmitContext& ctx); + void EmitConvertS16F32(EmitContext& ctx); + void EmitConvertS16F64(EmitContext& ctx); + void EmitConvertS32F16(EmitContext& ctx); + void EmitConvertS32F32(EmitContext& ctx); + void EmitConvertS32F64(EmitContext& ctx); + void EmitConvertS64F16(EmitContext& ctx); + void EmitConvertS64F32(EmitContext& ctx); + void EmitConvertS64F64(EmitContext& ctx); + void EmitConvertU16F16(EmitContext& ctx); + void EmitConvertU16F32(EmitContext& ctx); + void EmitConvertU16F64(EmitContext& ctx); + void EmitConvertU32F16(EmitContext& ctx); + void EmitConvertU32F32(EmitContext& ctx); + void EmitConvertU32F64(EmitContext& ctx); + void EmitConvertU64F16(EmitContext& ctx); + void EmitConvertU64F32(EmitContext& ctx); + void EmitConvertU64F64(EmitContext& ctx); + void EmitConvertU64U32(EmitContext& ctx); + void EmitConvertU32U64(EmitContext& ctx); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 000000000..447df5b8c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.u32[1], value); +} + +void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.f32[1], value); +} + +void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitPackUint2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 000000000..b190cf876 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { + return ctx.OpCompositeExtract(ctx.u32[1], vector, index); +} + +void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 000000000..b121305ea --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -0,0 +1,102 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitGetRegister(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetRegister(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetPred(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetPred(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Constant buffer indexing"); + } + if (!offset.IsImmediate()) { + throw NotImplementedException("Variable constant buffer offset"); + } + return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_cbuf"); +} + +void EmitSPIRV::EmitGetAttribute(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetAttribute(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitGetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetZFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetSFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetCFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSetOFlag(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { + if (ctx.workgroup_id.value == 0) { + ctx.workgroup_id = ctx.AddGlobalVariable( + ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); + ctx.Decorate(ctx.workgroup_id, spv::Decoration::BuiltIn, spv::BuiltIn::WorkgroupId); + } + return ctx.OpLoad(ctx.u32[3], ctx.workgroup_id); +} + +Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { + if (ctx.local_invocation_id.value == 0) { + ctx.local_invocation_id = ctx.AddGlobalVariable( + ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); + ctx.Decorate(ctx.local_invocation_id, spv::Decoration::BuiltIn, + spv::BuiltIn::LocalInvocationId); + } + return ctx.OpLoad(ctx.u32[3], ctx.local_invocation_id); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp new file mode 100644 index 000000000..770fe113c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { + ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), + ctx.BlockLabel(inst->Arg(2).Label())); +} + +void EmitSPIRV::EmitExit(EmitContext& ctx) { + ctx.OpReturn(); +} + +void EmitSPIRV::EmitReturn(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUnreachable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..9c39537e2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -0,0 +1,220 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { + const auto flags{inst->Flags()}; + if (flags.no_contraction) { + ctx.Decorate(op, spv::Decoration::NoContraction); + } + switch (flags.rounding) { + case IR::FpRounding::RN: + break; + case IR::FpRounding::RM: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); + break; + case IR::FpRounding::RP: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); + break; + case IR::FpRounding::RZ: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); + break; + } + if (flags.fmz_mode != IR::FmzMode::FTZ) { + throw NotImplementedException("Denorm management not implemented"); + } + return op; +} + +} // Anonymous namespace + +void EmitSPIRV::EmitFPAbs16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPAbs32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPAbs64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f16[1], a, b)); +} + +Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f32[1], a, b)); +} + +Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f64[1], a, b)); +} + +Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f16[1], a, b, c)); +} + +Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f32[1], a, b, c)); +} + +Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f64[1], a, b, c)); +} + +void EmitSPIRV::EmitFPMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f16[1], a, b)); +} + +Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f32[1], a, b)); +} + +Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f64[1], a, b)); +} + +void EmitSPIRV::EmitFPNeg16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPNeg32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPNeg64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecip32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecip64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSqrt(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSin(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPExp2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCos(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPLog2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 000000000..3ef4f3d78 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -0,0 +1,132 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + if (inst->HasAssociatedPseudoOperation()) { + throw NotImplementedException("Pseudo-operations on IAdd32"); + } + return ctx.OpIAdd(ctx.u32[1], a, b); +} + +void EmitSPIRV::EmitIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { + return ctx.OpISub(ctx.u32[1], a, b); +} + +void EmitSPIRV::EmitISub64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.u32[1], a, b); +} + +void EmitSPIRV::EmitINeg32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitIAbs32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { + return ctx.OpShiftLeftLogical(ctx.u32[1], base, shift); +} + +void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { + return ctx.OpBitFieldUExtract(ctx.u32[1], base, offset, count); +} + +void EmitSPIRV::EmitSLessThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitULessThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitIEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitULessThanEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSGreaterThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUGreaterThan(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitINotEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThanEqual(ctx.u1, lhs, rhs); +} + +void EmitSPIRV::EmitLogicalOr(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLogicalAnd(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLogicalXor(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLogicalNot(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 000000000..7b43c4ed8 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitConvertS16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS16F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS16F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS32F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS32F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS32F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS64F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS64F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertS64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU16F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU16F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU16F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64F16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64F32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64F64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU64U32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitConvertU32U64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp new file mode 100644 index 000000000..21a0d72fa --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -0,0 +1,125 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Storage buffer indexing"); + } + return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf"); +} + +void EmitSPIRV::EmitLoadStorage64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitLoadStorage128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx) { + ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf_store"); +} + +void EmitSPIRV::EmitWriteStorage64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitWriteStorage128(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 000000000..40a856f72 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -0,0 +1,25 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitSelect8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSelect16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSelect32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitSelect64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 000000000..3850b072c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -0,0 +1,29 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitUndef1(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef8(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitUndef64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9d7dc034c..ada0be834 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -130,27 +130,27 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { } U32 IREmitter::WorkgroupIdX() { - return Inst(Opcode::WorkgroupIdX); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } U32 IREmitter::WorkgroupIdY() { - return Inst(Opcode::WorkgroupIdY); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)}; } U32 IREmitter::WorkgroupIdZ() { - return Inst(Opcode::WorkgroupIdZ); + return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } U32 IREmitter::LocalInvocationIdX() { - return Inst(Opcode::LocalInvocationIdX); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } U32 IREmitter::LocalInvocationIdY() { - return Inst(Opcode::LocalInvocationIdY); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)}; } U32 IREmitter::LocalInvocationIdZ() { - return Inst(Opcode::LocalInvocationIdZ); + return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } U32 IREmitter::LoadGlobalU8(const U64& address) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 82b04f37c..5dc65f2df 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -21,9 +21,9 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetCbuf, U32, U32, U32, ) OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) @@ -32,12 +32,8 @@ OPCODE(SetZFlag, Void, U1, OPCODE(SetSFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupIdX, U32, ) -OPCODE(WorkgroupIdY, U32, ) -OPCODE(WorkgroupIdZ, U32, ) -OPCODE(LocalInvocationIdX, U32, ) -OPCODE(LocalInvocationIdY, U32, ) -OPCODE(LocalInvocationIdZ, U32, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined OPCODE(Undef1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index dcc3f6c0e..7e6bb07a2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -11,15 +11,15 @@ namespace Shader::Maxwell { -template +template static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { - using MethodType = decltype(visitor_method); + using MethodType = decltype(method); if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(pc, insn); + (visitor.*method)(pc, insn); } else if constexpr (std::is_invocable_r_v) { - (visitor.*visitor_method)(insn); + (visitor.*method)(insn); } else { - (visitor.*visitor_method)(); + (visitor.*method)(); } } diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 39a972919..593efde39 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -13,7 +13,7 @@ namespace Shader::Optimization { void IdentityRemovalPass(IR::Function& function) { std::vector to_invalidate; - for (auto& block : function.blocks) { + for (IR::Block* const block : function.blocks) { for (auto inst = block->begin(); inst != block->end();) { const size_t num_args{inst->NumArgs()}; for (size_t i = 0; i < num_args; ++i) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 19e36590c..9887e066d 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -6,6 +6,7 @@ #include +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/file_environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -51,18 +52,18 @@ void RunDatabase() { int main() { // RunDatabase(); - // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; - auto cfg{std::make_unique(env, 0)}; - // fmt::print(stdout, "{}\n", cfg->Dot()); - auto inst_pool{std::make_unique>()}; auto block_pool{std::make_unique>()}; - for (int i = 0; i < 8192 * 4; ++i) { - void(inst_pool->Create(IR::Opcode::Void, 0)); + // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"}; + for (int i = 0; i < 1; ++i) { + block_pool->ReleaseContents(); + inst_pool->ReleaseContents(); + auto cfg{std::make_unique(env, 0)}; + // fmt::print(stdout, "{}\n", cfg->Dot()); + IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; + // fmt::print(stdout, "{}\n", IR::DumpProgram(program)); + Backend::SPIRV::EmitSPIRV spirv{program}; } - - IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, *cfg)}; - fmt::print(stdout, "{}\n", IR::DumpProgram(program)); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null @@ -1,3166 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Vulkan { - -namespace { - -using Sirit::Id; -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -// TODO(Rodrigo): Use rasterizer's value -constexpr u32 MaxConstBufferFloats = 0x4000; -constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; - -constexpr u32 NumInputPatches = 32; // This value seems to be the standard - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -class Expression final { -public: - Expression(Id id_, Type type_) : id{id_}, type{type_} { - ASSERT(type_ != Type::Void); - } - Expression() : type{Type::Void} {} - - Id id{}; - Type type{}; -}; -static_assert(std::is_standard_layout_v); - -struct TexelBuffer { - Id image_type{}; - Id image{}; -}; - -struct SampledImage { - Id image_type{}; - Id sampler_type{}; - Id sampler_pointer_type{}; - Id variable{}; -}; - -struct StorageImage { - Id image_type{}; - Id image{}; -}; - -struct AttributeType { - Type type; - Id scalar; - Id vector; -}; - -struct VertexIndices { - std::optional position; - std::optional layer; - std::optional viewport; - std::optional point_size; - std::optional clip_distances; -}; - -struct GenericVaryingDescription { - Id id = nullptr; - u32 first_element = 0; - bool is_scalar = false; -}; - -spv::Dim GetSamplerDim(const SamplerEntry& sampler) { - ASSERT(!sampler.is_buffer); - switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return spv::Dim::Dim1D; - case Tegra::Shader::TextureType::Texture2D: - return spv::Dim::Dim2D; - case Tegra::Shader::TextureType::Texture3D: - return spv::Dim::Dim3D; - case Tegra::Shader::TextureType::TextureCube: - return spv::Dim::Cube; - default: - UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); - return spv::Dim::Dim2D; - } -} - -std::pair GetImageDim(const ImageEntry& image) { - switch (image.type) { - case Tegra::Shader::ImageType::Texture1D: - return {spv::Dim::Dim1D, false}; - case Tegra::Shader::ImageType::TextureBuffer: - return {spv::Dim::Buffer, false}; - case Tegra::Shader::ImageType::Texture1DArray: - return {spv::Dim::Dim1D, true}; - case Tegra::Shader::ImageType::Texture2D: - return {spv::Dim::Dim2D, false}; - case Tegra::Shader::ImageType::Texture2DArray: - return {spv::Dim::Dim2D, true}; - case Tegra::Shader::ImageType::Texture3D: - return {spv::Dim::Dim3D, false}; - default: - UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); - return {spv::Dim::Dim2D, false}; - } -} - -/// Returns the number of vertices present in a primitive topology. -u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { - switch (primitive_topology) { - case Maxwell::PrimitiveTopology::Points: - return 1; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return 2; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return 3; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return 4; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return 6; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return 3; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return 3; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return 3; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return 3; - default: - UNREACHABLE(); - return 3; - } -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { - switch (primitive) { - case Maxwell::TessellationPrimitive::Isolines: - return spv::ExecutionMode::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return spv::ExecutionMode::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return spv::ExecutionMode::Quads; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { - switch (spacing) { - case Maxwell::TessellationSpacing::Equal: - return spv::ExecutionMode::SpacingEqual; - case Maxwell::TessellationSpacing::FractionalOdd: - return spv::ExecutionMode::SpacingFractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return spv::ExecutionMode::SpacingFractionalEven; - } - UNREACHABLE(); - return spv::ExecutionMode::SpacingEqual; -} - -spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { - switch (input_topology) { - case Maxwell::PrimitiveTopology::Points: - return spv::ExecutionMode::InputPoints; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - return spv::ExecutionMode::InputLines; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return spv::ExecutionMode::InputLinesAdjacency; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return spv::ExecutionMode::InputTrianglesAdjacency; - case Maxwell::PrimitiveTopology::Quads: - UNIMPLEMENTED_MSG("Quads"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::QuadStrip: - UNIMPLEMENTED_MSG("QuadStrip"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Polygon: - UNIMPLEMENTED_MSG("Polygon"); - return spv::ExecutionMode::Triangles; - case Maxwell::PrimitiveTopology::Patches: - UNIMPLEMENTED_MSG("Patches"); - return spv::ExecutionMode::Triangles; - } - UNREACHABLE(); - return spv::ExecutionMode::Triangles; -} - -spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { - switch (output_topology) { - case Tegra::Shader::OutputTopology::PointList: - return spv::ExecutionMode::OutputPoints; - case Tegra::Shader::OutputTopology::LineStrip: - return spv::ExecutionMode::OutputLineStrip; - case Tegra::Shader::OutputTopology::TriangleStrip: - return spv::ExecutionMode::OutputTriangleStrip; - default: - UNREACHABLE(); - return spv::ExecutionMode::OutputPoints; - } -} - -/// Returns true if an attribute index is one of the 32 generic attributes -constexpr bool IsGenericAttribute(Attribute::Index attribute) { - return attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31; -} - -/// Returns the location of a generic attribute -u32 GetGenericAttributeLocation(Attribute::Index attribute) { - ASSERT(IsGenericAttribute(attribute)); - return static_cast(attribute) - static_cast(Attribute::Index::Attribute_0); -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (std::holds_alternative(meta)) { - return std::get(meta).precise; - } - return false; -} - -class SPIRVDecompiler final : public Sirit::Module { -public: - explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, - const Registry& registry_, const Specialization& specialization_) - : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, - registry{registry_}, specialization{specialization_} { - if (stage_ != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); - } - - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); - AddCapability(spv::Capability::ImageQuery); - AddCapability(spv::Capability::Image1D); - AddCapability(spv::Capability::ImageBuffer); - AddCapability(spv::Capability::ImageGatherExtended); - AddCapability(spv::Capability::SampledBuffer); - AddCapability(spv::Capability::StorageImageWriteWithoutFormat); - AddCapability(spv::Capability::DrawParameters); - AddCapability(spv::Capability::SubgroupBallotKHR); - AddCapability(spv::Capability::SubgroupVoteKHR); - AddExtension("SPV_KHR_16bit_storage"); - AddExtension("SPV_KHR_shader_ballot"); - AddExtension("SPV_KHR_subgroup_vote"); - AddExtension("SPV_KHR_storage_buffer_storage_class"); - AddExtension("SPV_KHR_variable_pointers"); - AddExtension("SPV_KHR_shader_draw_parameters"); - - if (!transform_feedback.empty()) { - if (device.IsExtTransformFeedbackSupported()) { - AddCapability(spv::Capability::TransformFeedback); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " - "supported on this device"); - } - } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { - if (ir.UsesViewportIndex()) { - AddCapability(spv::Capability::MultiViewport); - } - if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { - AddExtension("SPV_EXT_shader_viewport_index_layer"); - AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); - } - } - if (device.IsFormatlessImageLoadSupported()) { - AddCapability(spv::Capability::StorageImageReadWithoutFormat); - } - if (device.IsFloat16Supported()) { - AddCapability(spv::Capability::Float16); - } - t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); - t_half = Name(TypeVector(t_scalar_half, 2), "half"); - - const Id main = Decompile(); - - switch (stage) { - case ShaderType::Vertex: - AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); - break; - case ShaderType::TesselationControl: - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common2.threads_per_input_primitive); - break; - case ShaderType::TesselationEval: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Tessellation); - AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); - AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); - AddExecutionMode(main, info.tessellation_clockwise - ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); - break; - } - case ShaderType::Geometry: { - const auto& info = registry.GetGraphicsInfo(); - AddCapability(spv::Capability::Geometry); - AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); - AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); - AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); - AddExecutionMode(main, spv::ExecutionMode::OutputVertices, - header.common4.max_output_vertices); - // TODO(Rodrigo): Where can we get this info from? - AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); - break; - } - case ShaderType::Fragment: - AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); - AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); - if (header.ps.omap.depth) { - AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); - } - if (specialization.early_fragment_tests) { - AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); - } - break; - case ShaderType::Compute: - const auto workgroup_size = specialization.workgroup_size; - AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); - break; - } - } - -private: - Id Decompile() { - DeclareCommon(); - DeclareVertex(); - DeclareTessControl(); - DeclareTessEval(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareSharedMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - - u32 binding = specialization.base_binding; - binding = DeclareConstantBuffers(binding); - binding = DeclareGlobalBuffers(binding); - binding = DeclareUniformTexels(binding); - binding = DeclareSamplers(binding); - binding = DeclareStorageTexels(binding); - binding = DeclareImages(binding); - - const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); - AddLabel(); - - if (ir.IsDecompiled()) { - DeclareFlowVariables(); - DecompileAST(); - } else { - AllocateLabels(); - DecompileBranchMode(); - } - - OpReturn(); - OpFunctionEnd(); - - return main; - } - - void DefinePrologue() { - if (stage == ShaderType::Vertex) { - // Clear Position to avoid reading trash on the Z conversion. - const auto position_index = out_indices.position.value(); - const Id position = AccessElement(t_out_float4, out_vertex, position_index); - OpStore(position, v_varying_default); - - if (specialization.point_size) { - const u32 point_size_index = out_indices.point_size.value(); - const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); - OpStore(out_point_size, Constant(t_float, *specialization.point_size)); - } - } - } - - void DecompileAST(); - - void DecompileBranchMode() { - const u32 first_address = ir.GetBasicBlocks().begin()->first; - const Id loop_label = OpLabel("loop"); - const Id merge_label = OpLabel("merge"); - const Id dummy_label = OpLabel(); - const Id jump_label = OpLabel(); - continue_label = OpLabel("continue"); - - std::vector literals; - std::vector branch_labels; - for (const auto& [literal, label] : labels) { - literals.push_back(literal); - branch_labels.push_back(label); - } - - jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), - spv::StorageClass::Function, Constant(t_uint, first_address)); - AddLocalVariable(jmp_to); - - std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); - std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); - - Name(jmp_to, "jmp_to"); - Name(ssy_flow_stack, "ssy_flow_stack"); - Name(ssy_flow_stack_top, "ssy_flow_stack_top"); - Name(pbk_flow_stack, "pbk_flow_stack"); - Name(pbk_flow_stack_top, "pbk_flow_stack_top"); - - DefinePrologue(); - - OpBranch(loop_label); - AddLabel(loop_label); - OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); - OpBranch(dummy_label); - - AddLabel(dummy_label); - const Id default_branch = OpLabel(); - const Id jmp_to_load = OpLoad(t_uint, jmp_to); - OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); - OpSwitch(jmp_to_load, default_branch, literals, branch_labels); - - AddLabel(default_branch); - OpReturn(); - - for (const auto& [address, bb] : ir.GetBasicBlocks()) { - AddLabel(labels.at(address)); - - VisitBasicBlock(bb); - - const auto next_it = labels.lower_bound(address + 1); - const Id next_label = next_it != labels.end() ? next_it->second : default_branch; - OpBranch(next_label); - } - - AddLabel(jump_label); - OpBranch(continue_label); - AddLabel(continue_label); - OpBranch(loop_label); - AddLabel(merge_label); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); - - void AllocateLabels() { - for (const auto& pair : ir.GetBasicBlocks()) { - const u32 address = pair.first; - labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); - } - } - - void DeclareCommon() { - thread_id = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); - thread_masks[0] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); - thread_masks[1] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); - thread_masks[2] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); - thread_masks[3] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); - thread_masks[4] = - DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - - // Declare input attributes - vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); - instance_index = - DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); - base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); - base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); - } - - void DeclareTessControl() { - if (stage != ShaderType::TesselationControl) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertexArray(header.common2.threads_per_input_primitive); - - tess_level_outer = DeclareBuiltIn( - spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), - "tess_level_outer"); - Decorate(tess_level_outer, spv::Decoration::Patch); - - tess_level_inner = DeclareBuiltIn( - spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, - TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), - "tess_level_inner"); - Decorate(tess_level_inner, spv::Decoration::Patch); - - invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); - } - - void DeclareTessEval() { - if (stage != ShaderType::TesselationEval) { - return; - } - DeclareInputVertexArray(NumInputPatches); - DeclareOutputVertex(); - - tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); - DeclareInputVertexArray(num_input); - DeclareOutputVertex(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - - for (u32 rt = 0; rt < static_cast(std::size(frag_colors)); ++rt) { - if (!IsRenderTargetEnabled(rt)) { - continue; - } - const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); - Name(id, fmt::format("frag_color{}", rt)); - Decorate(id, spv::Decoration::Location, rt); - - frag_colors[rt] = id; - interfaces.push_back(id); - } - - if (header.ps.omap.depth) { - frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); - Name(frag_depth, "frag_depth"); - Decorate(frag_depth, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::FragDepth)); - - interfaces.push_back(frag_depth); - } - - frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); - front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); - point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - - workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); - local_invocation_id = - DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); - } - - void DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("gpr_{}", gpr)); - registers.emplace(gpr, AddGlobalVariable(id)); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); - Name(id, fmt::format("custom_var_{}", i)); - custom_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclarePredicates() { - for (const auto pred : ir.GetPredicates()) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("pred_{}", static_cast(pred))); - predicates.emplace(pred, AddGlobalVariable(id)); - } - } - - void DeclareFlowVariables() { - for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", static_cast(i))); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - } - - void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); - if (lmem_size == 0) { - return; - } - const auto element_count = static_cast(Common::AlignUp(lmem_size, 4) / 4); - const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); - const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); - Name(type_pointer, "LocalMemory"); - - local_memory = - OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); - AddGlobalVariable(Name(local_memory, "local_memory")); - } - - void DeclareSharedMemory() { - if (stage != ShaderType::Compute) { - return; - } - t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); - - u32 smem_size = specialization.shared_memory_size * 4; - if (smem_size == 0) { - // Avoid declaring an empty array. - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (smem_size > limit) { - LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", - smem_size, limit); - smem_size = limit; - } - - const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); - const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); - Name(type_pointer, "SharedMemory"); - - shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); - AddGlobalVariable(Name(shared_memory, "shared_memory")); - } - - void DeclareInternalFlags() { - static constexpr std::array names{"zero", "sign", "carry", "overflow"}; - - for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); - } - } - - void DeclareInputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Input; - std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); - } - - void DeclareOutputVertexArray(u32 length) { - constexpr auto storage = spv::StorageClass::Output; - std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); - } - - std::tuple DeclareVertexArray(spv::StorageClass storage_class, - std::string name, u32 length) { - const auto [struct_id, indices] = DeclareVertexStruct(); - const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); - const Id vertex_ptr = TypePointer(storage_class, vertex_array); - const Id vertex = OpVariable(vertex_ptr, storage_class); - AddGlobalVariable(Name(vertex, std::move(name))); - interfaces.push_back(vertex); - return {indices, vertex}; - } - - void DeclareOutputVertex() { - Id out_vertex_struct; - std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); - const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); - out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); - interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); - } - - void DeclareInputAttributes() { - for (const auto index : ir.GetInputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - const u32 location = GetGenericAttributeLocation(index); - if (!IsAttributeEnabled(location)) { - continue; - } - const auto type_descriptor = GetAttributeType(location); - Id type; - if (IsInputAttributeArray()) { - type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); - type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); - type = TypePointer(spv::StorageClass::Input, type); - } else { - type = type_descriptor.vector; - } - const Id id = OpVariable(type, spv::StorageClass::Input); - AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); - input_attributes.emplace(index, id); - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - - if (stage != ShaderType::Fragment) { - continue; - } - switch (header.ps.GetPixelImap(location)) { - case PixelImap::Constant: - Decorate(id, spv::Decoration::Flat); - break; - case PixelImap::Perspective: - // Default - break; - case PixelImap::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - default: - UNREACHABLE_MSG("Unused attribute being fetched"); - } - } - } - - void DeclareOutputAttributes() { - if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { - return; - } - - UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); - for (const auto index : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(index)) { - continue; - } - DeclareOutputAttribute(index); - } - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - - const u32 location = GetGenericAttributeLocation(index); - u8 element = 0; - while (element < 4) { - const std::size_t remainder = 4 - element; - - std::size_t num_components = remainder; - const std::optional tfb = GetTransformFeedbackInfo(index, element); - if (tfb) { - num_components = tfb->components; - } - - Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); - Id varying_default = v_varying_default; - if (IsOutputAttributeArray()) { - const u32 num = GetNumOutputVertices(); - type = TypeArray(type, Constant(t_uint, num)); - if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { - // Intel's proprietary driver fails to setup defaults for arrayed output - // attributes. - varying_default = ConstantComposite(type, std::vector(num, varying_default)); - } - } - type = TypePointer(spv::StorageClass::Output, type); - - std::string name = fmt::format("out_attr{}", location); - if (num_components < 4 || element > 0) { - name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); - } - - const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); - Name(AddGlobalVariable(id), name); - - GenericVaryingDescription description; - description.id = id; - description.first_element = element; - description.is_scalar = num_components == 1; - for (u32 i = 0; i < num_components; ++i) { - const u8 offset = static_cast(static_cast(index) * 4 + element + i); - output_attributes.emplace(offset, description); - } - interfaces.push_back(id); - - Decorate(id, spv::Decoration::Location, location); - if (element > 0) { - Decorate(id, spv::Decoration::Component, static_cast(element)); - } - if (tfb && device.IsExtTransformFeedbackSupported()) { - Decorate(id, spv::Decoration::XfbBuffer, static_cast(tfb->buffer)); - Decorate(id, spv::Decoration::XfbStride, static_cast(tfb->stride)); - Decorate(id, spv::Decoration::Offset, static_cast(tfb->offset)); - } - - element = static_cast(static_cast(element) + num_components); - } - } - - std::optional GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - return it->second; - } - - u32 DeclareConstantBuffers(u32 binding) { - for (const auto& [index, size] : ir.GetConstantBuffers()) { - const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo - : t_cbuf_std140_ubo; - const Id id = OpVariable(type, spv::StorageClass::Uniform); - AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - constant_buffers.emplace(index, id); - } - return binding; - } - - u32 DeclareGlobalBuffers(u32 binding) { - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); - AddGlobalVariable( - Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - global_buffers.emplace(base, id); - } - return binding; - } - - u32 DeclareUniformTexels(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (!sampler.is_buffer) { - continue; - } - ASSERT(!sampler.is_array); - ASSERT(!sampler.is_shadow); - - constexpr auto dim = spv::Dim::Buffer; - constexpr int depth = 0; - constexpr int arrayed = 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); - } - return binding; - } - - u32 DeclareSamplers(u32 binding) { - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - continue; - } - const auto dim = GetSamplerDim(sampler); - const int depth = sampler.is_shadow ? 1 : 0; - const int arrayed = sampler.is_array ? 1 : 0; - constexpr bool ms = false; - constexpr int sampled = 1; - constexpr auto format = spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); - const Id sampler_type = TypeSampledImage(image_type); - const Id sampler_pointer_type = - TypePointer(spv::StorageClass::UniformConstant, sampler_type); - const Id type = sampler.is_indexed - ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) - : sampler_type; - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - - sampled_images.emplace( - sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); - } - return binding; - } - - u32 DeclareStorageTexels(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type != Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - u32 DeclareImages(u32 binding) { - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - continue; - } - DeclareImage(image, binding); - } - return binding; - } - - void DeclareImage(const ImageEntry& image, u32& binding) { - const auto [dim, arrayed] = GetImageDim(image); - constexpr int depth = 0; - constexpr bool ms = false; - constexpr int sampled = 2; // This won't be accessed with a sampler - const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; - const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); - const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); - const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); - AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); - - Decorate(id, spv::Decoration::Binding, binding++); - Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); - if (image.is_read && !image.is_written) { - Decorate(id, spv::Decoration::NonWritable); - } else if (image.is_written && !image.is_read) { - Decorate(id, spv::Decoration::NonReadable); - } - - images.emplace(image.index, StorageImage{image_type, id}); - } - - bool IsRenderTargetEnabled(u32 rt) const { - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(rt, component)) { - return true; - } - } - return false; - } - - bool IsInputAttributeArray() const { - return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || - stage == ShaderType::Geometry; - } - - bool IsOutputAttributeArray() const { - return stage == ShaderType::TesselationControl; - } - - bool IsAttributeEnabled(u32 location) const { - return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; - } - - u32 GetNumInputVertices() const { - switch (stage) { - case ShaderType::Geometry: - return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); - case ShaderType::TesselationControl: - case ShaderType::TesselationEval: - return NumInputPatches; - default: - UNREACHABLE(); - return 1; - } - } - - u32 GetNumOutputVertices() const { - switch (stage) { - case ShaderType::TesselationControl: - return header.common2.threads_per_input_primitive; - default: - UNREACHABLE(); - return 1; - } - } - - std::tuple DeclareVertexStruct() { - struct BuiltIn { - Id type; - spv::BuiltIn builtin; - const char* name; - }; - std::vector members; - members.reserve(4); - - const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { - const auto index = static_cast(members.size()); - members.push_back(BuiltIn{type, builtin, name}); - return index; - }; - - VertexIndices indices; - indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); - - if (ir.UsesLayer()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); - } else { - LOG_ERROR( - Render_Vulkan, - "Shader requires Layer but it's not supported on this stage with this device."); - } - } - - if (ir.UsesViewportIndex()) { - if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { - indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); - } else { - LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " - "this stage with this device."); - } - } - - if (ir.UsesPointSize() || specialization.point_size) { - indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); - } - - const auto& ir_output_attributes = ir.GetOutputAttributes(); - const bool declare_clip_distances = std::any_of( - ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { - return index == Attribute::Index::ClipDistances0123 || - index == Attribute::Index::ClipDistances4567; - }); - if (declare_clip_distances) { - indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), - spv::BuiltIn::ClipDistance, "clip_distances"); - } - - std::vector member_types; - member_types.reserve(members.size()); - for (std::size_t i = 0; i < members.size(); ++i) { - member_types.push_back(members[i].type); - } - const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); - Decorate(per_vertex_struct, spv::Decoration::Block); - - for (std::size_t index = 0; index < members.size(); ++index) { - const auto& member = members[index]; - MemberName(per_vertex_struct, static_cast(index), member.name); - MemberDecorate(per_vertex_struct, static_cast(index), spv::Decoration::BuiltIn, - static_cast(member.builtin)); - } - - return {per_vertex_struct, indices}; - } - - void VisitBasicBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - const auto operation_index = static_cast(operation->GetCode()); - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {v_float_zero, Type::Float}; - } - return {OpLoad(t_float, registers.at(index)), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - return {Constant(t_uint, immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> Id { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return v_true; - case Tegra::Shader::Pred::NeverExecute: - return v_false; - default: - return OpLoad(t_bool, predicates.at(index)); - } - }(); - if (predicate->IsNegated()) { - return {OpLogicalNot(t_bool, value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - const auto attribute = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const auto& buffer = abuf->GetBuffer(); - - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsInputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - switch (attribute) { - case Attribute::Index::Position: { - if (stage == ShaderType::Fragment) { - return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), - Type::Float}; - } - const std::vector elements = {in_indices.position.value(), element}; - return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; - } - case Attribute::Index::PointCoord: { - switch (element) { - case 0: - case 1: - return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), - Type::Float}; - } - UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); - return {v_float_zero, Type::Float}; - } - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - switch (element) { - case 0: - case 1: - return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), - Type::Float}; - case 2: - return { - OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), - Type::Int}; - case 3: - return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), - Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {Constant(t_uint, 0U), Type::Uint}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - if (element == 3) { - const Id is_front_facing = OpLoad(t_bool, front_facing); - const Id true_value = Constant(t_int, static_cast(-1)); - const Id false_value = Constant(t_int, 0); - return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {v_float_zero, Type::Float}; - default: - if (!IsGenericAttribute(attribute)) { - break; - } - const u32 location = GetGenericAttributeLocation(attribute); - if (!IsAttributeEnabled(location)) { - // Disabled attributes (also known as constant attributes) always return zero. - return {v_float_zero, Type::Float}; - } - const auto type_descriptor = GetAttributeType(location); - const Type type = type_descriptor.type; - const Id attribute_id = input_attributes.at(attribute); - const std::vector elements = {element}; - const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); - return {OpLoad(GetTypeDefinition(type), pointer), type}; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {v_float_zero, Type::Float}; - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node& offset = cbuf->GetOffset(); - const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - - Id pointer{}; - if (device.IsKhrUniformBufferStandardLayoutSupported()) { - const Id buffer_offset = - OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); - pointer = - OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); - } else { - Id buffer_index{}; - Id buffer_element{}; - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - } else if (std::holds_alternative(*offset)) { - // Indirect access - const Id offset_id = AsUint(Visit(offset)); - const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); - const Id final_offset = - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); - buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); - buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); - } - pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, - buffer_element); - } - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto gmem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - const Id pointer = OpAccessChain(t_prv_float, local_memory, address); - return {OpLoad(t_float, pointer), Type::Float}; - } - - if (const auto smem = std::get_if(&*node)) { - return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const Id flag = internal_flags.at(static_cast(internal_flag->GetFlag())); - return {OpLoad(t_bool, flag), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; - ASSERT(type == Type::Void); - } - // It's invalid to call conditional on nested nodes, use an operation instead - const Id true_label = OpLabel(); - const Id skip_label = OpLabel(); - const Id condition = AsBool(Visit(conditional->GetCondition())); - OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); - OpBranchConditional(condition, true_label, skip_label); - AddLabel(true_label); - - conditional_branch_set = true; - inside_branch = false; - VisitBasicBlock(conditional->GetCode()); - conditional_branch_set = false; - if (!inside_branch) { - OpBranch(skip_label); - } else { - inside_branch = false; - } - AddLabel(skip_label); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - if (device.HasDebuggingToolAttached()) { - // We should insert comments with OpString instead of using named variables - Name(OpUndef(t_int), comment->GetText()); - } - return {}; - } - - UNREACHABLE(); - return {}; - } - - template - Expression Unary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - - const Id value = (this->*func)(type_def, op_a); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Binary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - - const Id value = (this->*func)(type_def, op_a, op_b); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Ternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - template - Expression Quaternary(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - const Id op_a = As(Visit(operation[0]), type_a); - const Id op_b = As(Visit(operation[1]), type_b); - const Id op_c = As(Visit(operation[2]), type_c); - const Id op_d = As(Visit(operation[3]), type_d); - - const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); - if (IsPrecise(operation)) { - Decorate(value, spv::Decoration::NoContraction); - } - return {value, result_type}; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target{}; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit its source - // because it might have side effects. - Visit(src); - return {}; - } - target = {registers.at(gpr->GetIndex()), Type::Float}; - - } else if (const auto abuf = std::get_if(&*dest)) { - const auto& buffer = abuf->GetBuffer(); - const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector indices) { - std::vector members; - members.reserve(std::size(indices) + 1); - - if (buffer && IsOutputAttributeArray()) { - members.push_back(AsUint(Visit(buffer))); - } - for (const u32 index : indices) { - members.push_back(Constant(t_uint, index)); - } - return OpAccessChain(pointer_type, composite, members); - }; - - target = [&]() -> Expression { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex(); attribute) { - case Attribute::Index::Position: { - const u32 index = out_indices.position.value(); - return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; - } - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 1: { - if (!out_indices.layer) { - return {}; - } - const u32 index = out_indices.layer.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 2: { - if (!out_indices.viewport) { - return {}; - } - const u32 index = out_indices.viewport.value(); - return {AccessElement(t_out_int, out_vertex, index), Type::Int}; - } - case 3: { - const auto index = out_indices.point_size.value(); - return {AccessElement(t_out_float, out_vertex, index), Type::Float}; - } - default: - UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); - return {}; - } - case Attribute::Index::ClipDistances0123: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; - } - case Attribute::Index::ClipDistances4567: { - const u32 index = out_indices.clip_distances.value(); - return {AccessElement(t_out_float, out_vertex, index, element + 4), - Type::Float}; - } - default: - if (IsGenericAttribute(attribute)) { - const u8 offset = static_cast(static_cast(attribute) * 4 + element); - const GenericVaryingDescription description = output_attributes.at(offset); - const Id composite = description.id; - std::vector indices; - if (!description.is_scalar) { - indices.push_back(element - description.first_element); - } - return {ArrayPass(t_out_float, composite, indices), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", - static_cast(attribute)); - return {}; - } - }(); - - } else if (const auto patch = std::get_if(&*dest)) { - target = [&]() -> Expression { - const u32 offset = patch->GetOffset(); - switch (offset) { - case 0: - case 1: - case 2: - case 3: - return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; - case 4: - case 5: - return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; - } - UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); - return {}; - }(); - - } else if (const auto lmem = std::get_if(&*dest)) { - Id address = AsUint(Visit(lmem->GetAddress())); - address = OpUDiv(t_uint, address, Constant(t_uint, 4)); - target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; - - } else if (const auto smem = std::get_if(&*dest)) { - target = {GetSharedMemoryPointer(*smem), Type::Uint}; - - } else if (const auto gmem = std::get_if(&*dest)) { - target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; - - } else if (const auto cv = std::get_if(&*dest)) { - target = {custom_variables.at(cv->GetIndex()), Type::Float}; - - } else { - UNIMPLEMENTED(); - } - - if (!target.id) { - // On failure we return a nullptr target.id, skip these stores. - return {}; - } - - OpStore(target.id, As(Visit(src), target.type)); - return {}; - } - - template - Expression FCastHalf(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), - Type::Float}; - } - - Expression FSwizzleAdd(Operation operation) { - const Id minus = Constant(t_float, -1.0f); - const Id plus = v_float_one; - const Id zero = v_float_zero; - const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); - const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); - - Id mask = OpLoad(t_uint, thread_id); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); - mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); - mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); - - const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); - const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); - - const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); - const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); - return {OpFAdd(t_float, op_a, op_b), Type::Float}; - } - - Expression HNegate(Operation operation) { - const bool is_f16 = device.IsFloat16Supported(); - const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); - const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); - const auto GetNegate = [&](std::size_t index) { - return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); - }; - const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); - return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const auto Pack = [&](std::size_t index) { - const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); - return OpCompositeConstruct(t_half, scalar, scalar); - }; - const Id value = AsHalfFloat(Visit(operation[0])); - const Id min = Pack(1); - const Id max = Pack(2); - - const Id clamped = OpFClamp(t_half, value, min, max); - if (IsPrecise(operation)) { - Decorate(clamped, spv::Decoration::NoContraction); - } - return {clamped, Type::HalfFloat}; - } - - Expression HCastFloat(Operation operation) { - const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = Visit(operation[0]); - const auto type = std::get(operation.GetMeta()); - if (type == Tegra::Shader::HalfType::H0_H1) { - return operand; - } - const auto value = [&] { - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::F32: - return GetHalfScalarFromFloat(AsFloat(operand)); - case Tegra::Shader::HalfType::H0_H0: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); - case Tegra::Shader::HalfType::H1_H1: - return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); - default: - UNREACHABLE(); - return ConstantNull(t_half); - } - }(); - return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; - } - - Expression HMergeF32(Operation operation) { - const Id value = AsHalfFloat(Visit(operation[0])); - return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; - } - - template - Expression HMergeHN(Operation operation) { - const Id target = AsHalfFloat(Visit(operation[0])); - const Id source = AsHalfFloat(Visit(operation[1])); - const Id object = OpCompositeExtract(t_scalar_half, source, offset); - return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); - const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); - return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; - } - - Expression LogicalAddCarry(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - const Id op_b = AsUint(Visit(operation[1])); - - const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); - const Id carry = OpCompositeExtract(t_uint, result, 1); - return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Id target{}; - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = predicates.at(index); - - } else if (const auto flag = std::get_if(&*dest)) { - target = internal_flags.at(static_cast(flag->GetFlag())); - } - - OpStore(target, AsBool(Visit(src))); - return {}; - } - - Expression LogicalFOrdered(Operation operation) { - // Emulate SPIR-V's OpOrdered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); - const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); - return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; - } - - Expression LogicalFUnordered(Operation operation) { - // Emulate SPIR-V's OpUnordered - const Id op_a = AsFloat(Visit(operation[0])); - const Id op_b = AsFloat(Visit(operation[1])); - const Id is_nan_a = OpIsNan(t_bool, op_a); - const Id is_nan_b = OpIsNan(t_bool, op_b); - return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; - } - - Id GetTextureSampler(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - ASSERT(!meta.sampler.is_buffer); - - const auto& entry = sampled_images.at(meta.sampler.index); - Id sampler = entry.variable; - if (meta.sampler.is_indexed) { - const Id index = AsInt(Visit(meta.index)); - sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); - } - return OpLoad(entry.sampler_type, sampler); - } - - Id GetTextureImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 index = meta.sampler.index; - if (meta.sampler.is_buffer) { - const auto& entry = uniform_texels.at(index); - return OpLoad(entry.image_type, entry.image); - } else { - const auto& entry = sampled_images.at(index); - return OpImage(entry.image_type, GetTextureSampler(operation)); - } - } - - Id GetImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto entry = images.at(meta.image.index); - return OpLoad(entry.image_type, entry.image); - } - - Id AssembleVector(const std::vector& coords, Type type) { - const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); - return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); - } - - Id GetCoordinates(Operation operation, Type type) { - std::vector coords; - for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { - coords.push_back(As(Visit(operation[i]), type)); - } - if (const auto meta = std::get_if(&operation.GetMeta())) { - // Add array coordinate for textures - if (meta->sampler.is_array) { - Id array = AsInt(Visit(meta->array)); - if (type == Type::Float) { - array = OpConvertSToF(t_float, array); - } - coords.push_back(array); - } - } - return AssembleVector(coords, type); - } - - Id GetOffsetCoordinates(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::vector coords; - coords.reserve(meta.aoffi.size()); - for (const auto& coord : meta.aoffi) { - coords.push_back(AsInt(Visit(coord))); - } - return AssembleVector(coords, Type::Int); - } - - std::pair GetDerivatives(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const auto& derivatives = meta.derivates; - ASSERT(derivatives.size() % 2 == 0); - - const std::size_t components = derivatives.size() / 2; - std::vector dx, dy; - dx.reserve(components); - dy.reserve(components); - for (std::size_t index = 0; index < components; ++index) { - dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); - dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); - } - return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; - } - - Expression GetTextureElement(Operation operation, Id sample_value, Type type) { - const auto& meta = std::get(operation.GetMeta()); - const auto type_def = GetTypeDefinition(type); - return {OpCompositeExtract(type_def, sample_value, meta.element), type}; - } - - Expression Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const bool can_implicit = stage == ShaderType::Fragment; - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - - std::vector operands; - spv::ImageOperandsMask mask{}; - if (meta.bias) { - mask = mask | spv::ImageOperandsMask::Bias; - operands.push_back(AsFloat(Visit(meta.bias))); - } - - if (!can_implicit) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(v_float_zero); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.depth_compare) { - // Depth sampling - UNIMPLEMENTED_IF(meta.bias); - const Id dref = AsFloat(Visit(meta.depth_compare)); - if (can_implicit) { - return { - OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } else { - return { - OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - } - - Id texture; - if (can_implicit) { - texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); - } else { - texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const Id lod = AsFloat(Visit(meta.lod)); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; - std::vector operands{lod}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - const Id dref = AsFloat(Visit(meta.depth_compare)); - return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), - Type::Float}; - } - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const Id coords = GetCoordinates(operation, Type::Float); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id texture{}; - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - if (meta.sampler.is_shadow) { - texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, - AsFloat(Visit(meta.depth_compare)), mask, operands); - } else { - u32 component_value = 0; - if (meta.component) { - const auto component = std::get_if(&*meta.component); - ASSERT_MSG(component, "Component is not an immediate value"); - component_value = component->GetValue(); - } - texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, - Constant(t_uint, component_value), mask, operands); - } - return GetTextureElement(operation, texture, Type::Float); - } - - Expression TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const auto image_id = GetTextureImage(operation); - if (meta.element == 3) { - return {OpImageQueryLevels(t_int, image_id), Type::Int}; - } - - const Id lod = AsUint(Visit(operation[0])); - const std::size_t coords_count = [&meta] { - switch (const auto type = meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return 1; - case Tegra::Shader::TextureType::Texture2D: - case Tegra::Shader::TextureType::TextureCube: - return 2; - case Tegra::Shader::TextureType::Texture3D: - return 3; - default: - UNREACHABLE_MSG("Invalid texture type={}", type); - return 2; - } - }(); - - if (meta.element >= coords_count) { - return {v_float_zero, Type::Float}; - } - - const std::array types = {t_int, t_int2, t_int3}; - const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); - const Id size = OpCompositeExtract(t_int, sizes, meta.element); - return {size, Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - UNIMPLEMENTED_IF(meta.depth_compare); - - if (meta.element >= 2) { - UNREACHABLE_MSG("Invalid element"); - return {v_float_zero, Type::Float}; - } - const auto sampler_id = GetTextureSampler(operation); - - const Id multiplier = Constant(t_float, 256.0f); - const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); - - const Id coords = GetCoordinates(operation, Type::Float); - Id size = OpImageQueryLod(t_float2, sampler_id, coords); - size = OpFMul(t_float2, size, multipliers); - size = OpConvertFToS(t_int2, size); - return GetTextureElement(operation, size, Type::Int); - } - - Expression TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.depth_compare); - - const Id image = GetTextureImage(operation); - const Id coords = GetCoordinates(operation, Type::Int); - - spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; - std::vector operands; - Id fetch; - - if (meta.lod && !meta.sampler.is_buffer) { - mask = mask | spv::ImageOperandsMask::Lod; - operands.push_back(AsInt(Visit(meta.lod))); - } - - if (!meta.aoffi.empty()) { - mask = mask | spv::ImageOperandsMask::Offset; - operands.push_back(GetOffsetCoordinates(operation)); - } - - fetch = OpImageFetch(t_float4, image, coords, mask, operands); - return GetTextureElement(operation, fetch, Type::Float); - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(!meta.aoffi.empty()); - - const Id sampler = GetTextureSampler(operation); - const Id coords = GetCoordinates(operation, Type::Float); - const auto [dx, dy] = GetDerivatives(operation); - const std::vector grad = {dx, dy}; - - static constexpr auto mask = spv::ImageOperandsMask::Grad; - const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); - return GetTextureElement(operation, texture, Type::Float); - } - - Expression ImageLoad(Operation operation) { - if (!device.IsFormatlessImageLoadSupported()) { - return {v_float_zero, Type::Float}; - } - - const auto& meta{std::get(operation.GetMeta())}; - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); - - return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto meta{std::get(operation.GetMeta())}; - std::vector colors; - for (const auto& value : meta.values) { - colors.push_back(AsUint(Visit(value))); - } - - const Id coords = GetCoordinates(operation, Type::Int); - const Id texel = OpCompositeConstruct(t_uint4, colors); - - OpImageWrite(GetImage(operation), coords, texel, {}); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - const Id coordinate = GetCoordinates(operation, Type::Int); - const Id image = images.at(meta.image.index).image; - const Id sample = v_uint_zero; - const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); - - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(meta.values[0])); - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - Id pointer; - if (const auto smem = std::get_if(&*operation[0])) { - pointer = GetSharedMemoryPointer(*smem); - } else if (const auto gmem = std::get_if(&*operation[0])) { - pointer = GetGlobalMemoryPointer(*gmem); - } else { - UNREACHABLE(); - return {v_float_zero, Type::Float}; - } - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = v_uint_zero; - const Id value = AsUint(Visit(operation[1])); - - return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - Atomic(operation); - return {}; - } - - Expression Branch(Operation operation) { - const auto& target = std::get(*operation[0]); - OpStore(jmp_to, Constant(t_uint, target.GetValue())); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression BranchIndirect(Operation operation) { - const Id op_a = AsUint(Visit(operation[0])); - - OpStore(jmp_to, op_a); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto& target = std::get(*operation[0]); - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, current); - - OpStore(access, Constant(t_uint, target.GetValue())); - OpStore(flow_stack_top, next); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); - const Id current = OpLoad(t_uint, flow_stack_top); - const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); - const Id access = OpAccessChain(t_func_uint, flow_stack, previous); - const Id target = OpLoad(t_uint, access); - - OpStore(flow_stack_top, previous); - OpStore(jmp_to, target); - OpBranch(continue_label); - inside_branch = true; - if (!conditional_branch_set) { - AddLabel(); - } - return {}; - } - - Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { - using Compare = Maxwell::ComparisonOp; - switch (compare_op) { - case Compare::NeverOld: - return v_false; // Never let the test pass - case Compare::LessOld: - return OpFOrdLessThan(t_bool, operand_1, operand_2); - case Compare::EqualOld: - return OpFOrdEqual(t_bool, operand_1, operand_2); - case Compare::LessEqualOld: - return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); - case Compare::GreaterOld: - return OpFOrdGreaterThan(t_bool, operand_1, operand_2); - case Compare::NotEqualOld: - return OpFOrdNotEqual(t_bool, operand_1, operand_2); - case Compare::GreaterEqualOld: - return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); - default: - UNREACHABLE(); - return v_true; - } - } - - void AlphaTest(Id pointer) { - if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { - return; - } - const Id true_label = OpLabel(); - const Id discard_label = OpLabel(); - const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); - const Id alpha_value = OpLoad(t_float, pointer); - const Id condition = - MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); - - OpBranchConditional(condition, true_label, discard_label); - AddLabel(discard_label); - OpKill(); - AddLabel(true_label); - } - - void PreExit() { - if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { - const u32 position_index = out_indices.position.value(); - const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); - const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); - Id depth = OpLoad(t_float, z_pointer); - depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); - depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); - OpStore(z_pointer, depth); - } - if (stage == ShaderType::Fragment) { - const auto SafeGetRegister = [this](u32 reg) { - if (const auto it = registers.find(reg); it != registers.end()) { - return OpLoad(t_float, it->second); - } - return v_float_zero; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, - "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); - OpStore(pointer, SafeGetRegister(current_reg)); - if (rt == 0 && component == 3) { - AlphaTest(pointer); - } - ++current_reg; - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and - // current_reg already contains one past the last color register. - OpStore(frag_depth, SafeGetRegister(current_reg + 1)); - } - } - } - - Expression Exit(Operation operation) { - PreExit(); - inside_branch = true; - if (conditional_branch_set) { - OpReturn(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpReturn(); - AddLabel(); - } - return {}; - } - - Expression Discard(Operation operation) { - inside_branch = true; - if (conditional_branch_set) { - OpKill(); - } else { - const Id dummy = OpLabel(); - OpBranch(dummy); - AddLabel(dummy); - OpKill(); - AddLabel(); - } - return {}; - } - - Expression EmitVertex(Operation) { - OpEmitVertex(); - return {}; - } - - Expression EndPrimitive(Operation operation) { - OpEndPrimitive(); - return {}; - } - - Expression InvocationId(Operation) { - return {OpLoad(t_int, invocation_id), Type::Int}; - } - - Expression YNegate(Operation) { - LOG_WARNING(Render_Vulkan, "(STUBBED)"); - return {Constant(t_float, 1.0f), Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - const Id id = OpLoad(t_uint3, local_invocation_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation operation) { - const Id id = OpLoad(t_uint3, workgroup_id); - return {OpCompositeExtract(t_uint, id, element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const Id predicate = AsBool(Visit(operation[0])); - const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); - - if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { - // Guest-like devices can just return the first index. - return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; - } - - // The others will have to return what is local to the current thread. - // For instance a device with a warp size of 64 will return the upper uint when the current - // thread is 38. - const Id tid = OpLoad(t_uint, thread_id); - const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); - return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; - } - - template - Expression Vote(Operation operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id predicate = AsBool(Visit(operation[0])); - return {(this->*func)(t_bool, predicate), Type::Bool}; - } - - Expression ThreadId(Operation) { - return {OpLoad(t_uint, thread_id), Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - // TODO(Rodrigo): Handle devices with different warp sizes - const Id mask = thread_masks[index]; - return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - const Id value = AsFloat(Visit(operation[0])); - const Id index = AsUint(Visit(operation[1])); - return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); - return {}; - } - - const auto scope = spv::Scope::Workgroup; - const auto memory = spv::Scope::Workgroup; - const auto semantics = - spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; - OpControlBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(memory)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - template - Expression MemoryBarrier(Operation) { - const auto semantics = - spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | - spv::MemorySemanticsMask::WorkgroupMemory | - spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; - - OpMemoryBarrier(Constant(t_uint, static_cast(scope)), - Constant(t_uint, static_cast(semantics))); - return {}; - } - - Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { - const Id id = OpVariable(type, storage); - Decorate(id, spv::Decoration::BuiltIn, static_cast(builtin)); - AddGlobalVariable(Name(id, std::move(name))); - interfaces.push_back(id); - return id; - } - - Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { - return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); - } - - template - Id AccessElement(Id pointer_type, Id composite, Args... elements_) { - std::vector members; - auto elements = {elements_...}; - for (const auto element : elements) { - members.push_back(Constant(t_uint, element)); - } - - return OpAccessChain(pointer_type, composite, members); - } - - Id As(Expression expr, Type wanted_type) { - switch (wanted_type) { - case Type::Bool: - return AsBool(expr); - case Type::Bool2: - return AsBool2(expr); - case Type::Float: - return AsFloat(expr); - case Type::Int: - return AsInt(expr); - case Type::Uint: - return AsUint(expr); - case Type::HalfFloat: - return AsHalfFloat(expr); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsBool(Expression expr) { - ASSERT(expr.type == Type::Bool); - return expr.id; - } - - Id AsBool2(Expression expr) { - ASSERT(expr.type == Type::Bool2); - return expr.id; - } - - Id AsFloat(Expression expr) { - switch (expr.type) { - case Type::Float: - return expr.id; - case Type::Int: - case Type::Uint: - return OpBitcast(t_float, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_float, expr.id); - } - return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsInt(Expression expr) { - switch (expr.type) { - case Type::Int: - return expr.id; - case Type::Float: - case Type::Uint: - return OpBitcast(t_int, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_int, expr.id); - } - return OpPackHalf2x16(t_int, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsUint(Expression expr) { - switch (expr.type) { - case Type::Uint: - return expr.id; - case Type::Float: - case Type::Int: - return OpBitcast(t_uint, expr.id); - case Type::HalfFloat: - if (device.IsFloat16Supported()) { - return OpBitcast(t_uint, expr.id); - } - return OpPackHalf2x16(t_uint, expr.id); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id AsHalfFloat(Expression expr) { - switch (expr.type) { - case Type::HalfFloat: - return expr.id; - case Type::Float: - case Type::Int: - case Type::Uint: - if (device.IsFloat16Supported()) { - return OpBitcast(t_half, expr.id); - } - return OpUnpackHalf2x16(t_half, AsUint(expr)); - default: - UNREACHABLE(); - return expr.id; - } - } - - Id GetHalfScalarFromFloat(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_scalar_half, value); - } - return value; - } - - Id GetFloatFromHalfScalar(Id value) { - if (device.IsFloat16Supported()) { - return OpFConvert(t_float, value); - } - return value; - } - - AttributeType GetAttributeType(u32 location) const { - if (stage != ShaderType::Vertex) { - return {Type::Float, t_in_float, t_in_float4}; - } - switch (specialization.attribute_types.at(location)) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return {Type::Float, t_in_float, t_in_float4}; - case Maxwell::VertexAttribute::Type::SignedInt: - return {Type::Int, t_in_int, t_in_int4}; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return {Type::Uint, t_in_uint, t_in_uint4}; - default: - UNREACHABLE(); - return {Type::Float, t_in_float, t_in_float4}; - } - } - - Id GetTypeDefinition(Type type) const { - switch (type) { - case Type::Bool: - return t_bool; - case Type::Bool2: - return t_bool2; - case Type::Float: - return t_float; - case Type::Int: - return t_int; - case Type::Uint: - return t_uint; - case Type::HalfFloat: - return t_half; - default: - UNREACHABLE(); - return {}; - } - } - - std::array GetTypeVectorDefinitionLut(Type type) const { - switch (type) { - case Type::Float: - return {t_float, t_float2, t_float3, t_float4}; - case Type::Int: - return {t_int, t_int2, t_int3, t_int4}; - case Type::Uint: - return {t_uint, t_uint2, t_uint3, t_uint4}; - default: - UNIMPLEMENTED(); - return {}; - } - } - - std::tuple CreateFlowStack() { - // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely - // that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - constexpr auto storage_class = spv::StorageClass::Function; - - const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); - const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, - ConstantNull(flow_stack_type)); - const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); - AddLocalVariable(stack); - AddLocalVariable(top); - return std::tie(stack, top); - } - - std::pair GetFlowStack(Operation operation) { - const auto stack_class = std::get(operation.GetMeta()); - switch (stack_class) { - case MetaStackClass::Ssy: - return {ssy_flow_stack, ssy_flow_stack_top}; - case MetaStackClass::Pbk: - return {pbk_flow_stack, pbk_flow_stack_top}; - } - UNREACHABLE(); - return {}; - } - - Id GetGlobalMemoryPointer(const GmemNode& gmem) { - const Id real = AsUint(Visit(gmem.GetRealAddress())); - const Id base = AsUint(Visit(gmem.GetBaseAddress())); - const Id diff = OpISub(t_uint, real, base); - const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); - const Id buffer = global_buffers.at(gmem.GetDescriptor()); - return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); - } - - Id GetSharedMemoryPointer(const SmemNode& smem) { - ASSERT(stage == ShaderType::Compute); - Id address = AsUint(Visit(smem.GetAddress())); - address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); - return OpAccessChain(t_smem_uint, shared_memory, address); - } - - static constexpr std::array operation_decompilers = { - &SPIRVDecompiler::Assign, - - &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, - Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, - &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, - &SPIRVDecompiler::FCastHalf<0>, - &SPIRVDecompiler::FCastHalf<1>, - &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, - &SPIRVDecompiler::FSwizzleAdd, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, - - &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, - &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, - &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, - &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, - &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, - - &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, - &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, - &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, - &SPIRVDecompiler::HNegate, - &SPIRVDecompiler::HClamp, - &SPIRVDecompiler::HCastFloat, - &SPIRVDecompiler::HUnpack, - &SPIRVDecompiler::HMergeF32, - &SPIRVDecompiler::HMergeHN<0>, - &SPIRVDecompiler::HMergeHN<1>, - &SPIRVDecompiler::HPack2, - - &SPIRVDecompiler::LogicalAssign, - &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, - &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, - &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, - Type::Uint>, - &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::LogicalFOrdered, - &SPIRVDecompiler::LogicalFUnordered, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, - &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, - - &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, - &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, - - &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, - &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, - - &SPIRVDecompiler::LogicalAddCarry, - - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - // TODO(Rodrigo): Should these use the OpFUnord* variants? - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, - &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, - - &SPIRVDecompiler::Texture, - &SPIRVDecompiler::TextureLod, - &SPIRVDecompiler::TextureGather, - &SPIRVDecompiler::TextureQueryDimensions, - &SPIRVDecompiler::TextureQueryLod, - &SPIRVDecompiler::TexelFetch, - &SPIRVDecompiler::TextureGradient, - - &SPIRVDecompiler::ImageLoad, - &SPIRVDecompiler::ImageStore, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, - &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, - &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, - - &SPIRVDecompiler::Branch, - &SPIRVDecompiler::BranchIndirect, - &SPIRVDecompiler::PushFlowStack, - &SPIRVDecompiler::PopFlowStack, - &SPIRVDecompiler::Exit, - &SPIRVDecompiler::Discard, - - &SPIRVDecompiler::EmitVertex, - &SPIRVDecompiler::EndPrimitive, - - &SPIRVDecompiler::InvocationId, - &SPIRVDecompiler::YNegate, - &SPIRVDecompiler::LocalInvocationId<0>, - &SPIRVDecompiler::LocalInvocationId<1>, - &SPIRVDecompiler::LocalInvocationId<2>, - &SPIRVDecompiler::WorkGroupId<0>, - &SPIRVDecompiler::WorkGroupId<1>, - &SPIRVDecompiler::WorkGroupId<2>, - - &SPIRVDecompiler::BallotThread, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, - &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, - - &SPIRVDecompiler::ThreadId, - &SPIRVDecompiler::ThreadMask<0>, // Eq - &SPIRVDecompiler::ThreadMask<1>, // Ge - &SPIRVDecompiler::ThreadMask<2>, // Gt - &SPIRVDecompiler::ThreadMask<3>, // Le - &SPIRVDecompiler::ThreadMask<4>, // Lt - &SPIRVDecompiler::ShuffleIndexed, - - &SPIRVDecompiler::Barrier, - &SPIRVDecompiler::MemoryBarrier, - &SPIRVDecompiler::MemoryBarrier, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - const Device& device; - const ShaderIR& ir; - const ShaderType stage; - const Tegra::Shader::Header header; - const Registry& registry; - const Specialization& specialization; - std::unordered_map transform_feedback; - - const Id t_void = Name(TypeVoid(), "void"); - - const Id t_bool = Name(TypeBool(), "bool"); - const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); - - const Id t_int = Name(TypeInt(32, true), "int"); - const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); - const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); - const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); - - const Id t_uint = Name(TypeInt(32, false), "uint"); - const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); - const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); - const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); - - const Id t_float = Name(TypeFloat(32), "float"); - const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); - const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); - const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); - - const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); - const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); - - const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); - - const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); - const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); - const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); - const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); - const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); - const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); - const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); - const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); - const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); - const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); - - const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); - - const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); - const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); - - const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_std140 = Decorate( - Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), - spv::Decoration::ArrayStride, 16U); - const Id t_cbuf_scalar = Decorate( - Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), - spv::Decoration::ArrayStride, 4U); - const Id t_cbuf_std140_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_scalar_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); - const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); - - Id t_smem_uint{}; - - const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); - const Id t_gmem_array = - Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); - const Id t_gmem_struct = MemberDecorate( - Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); - - const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); - - const Id v_float_zero = Constant(t_float, 0.0f); - const Id v_float_one = Constant(t_float, 1.0f); - const Id v_uint_zero = Constant(t_uint, 0); - - // Nvidia uses these defaults for varyings (e.g. position and generic attributes) - const Id v_varying_default = - ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); - - const Id v_true = ConstantTrue(t_bool); - const Id v_false = ConstantFalse(t_bool); - - Id t_scalar_half{}; - Id t_half{}; - - Id out_vertex{}; - Id in_vertex{}; - std::map registers; - std::map custom_variables; - std::map predicates; - std::map flow_variables; - Id local_memory{}; - Id shared_memory{}; - std::array internal_flags{}; - std::map input_attributes; - std::unordered_map output_attributes; - std::map constant_buffers; - std::map global_buffers; - std::map uniform_texels; - std::map sampled_images; - std::map images; - - std::array frag_colors{}; - Id instance_index{}; - Id vertex_index{}; - Id base_instance{}; - Id base_vertex{}; - Id frag_depth{}; - Id frag_coord{}; - Id front_facing{}; - Id point_coord{}; - Id tess_level_outer{}; - Id tess_level_inner{}; - Id tess_coord{}; - Id invocation_id{}; - Id workgroup_id{}; - Id local_invocation_id{}; - Id thread_id{}; - std::array thread_masks{}; // eq, ge, gt, le, lt - - VertexIndices in_indices; - VertexIndices out_indices; - - std::vector interfaces; - - Id jmp_to{}; - Id ssy_flow_stack_top{}; - Id pbk_flow_stack_top{}; - Id ssy_flow_stack{}; - Id pbk_flow_stack{}; - Id continue_label{}; - std::map labels; - - bool conditional_branch_set{}; - bool inside_branch{}; -}; - -class ExprDecompiler { -public: - explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - Id operator()(const ExprAnd& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalAnd(type_def, op1, op2); - } - - Id operator()(const ExprOr& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - const Id op2 = Visit(expr.operand2); - return decomp.OpLogicalOr(type_def, op1, op2); - } - - Id operator()(const ExprNot& expr) { - const Id type_def = decomp.GetTypeDefinition(Type::Bool); - const Id op1 = Visit(expr.operand1); - return decomp.OpLogicalNot(type_def, op1); - } - - Id operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); - } - - Id operator()(const ExprCondCode& expr) { - return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); - } - - Id operator()(const ExprVar& expr) { - return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); - } - - Id operator()(const ExprBoolean& expr) { - return expr.value ? decomp.v_true : decomp.v_false; - } - - Id operator()(const ExprGprEqual& expr) { - const Id target = decomp.Constant(decomp.t_uint, expr.value); - Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); - gpr = decomp.OpBitcast(decomp.t_uint, gpr); - return decomp.OpIEqual(decomp.t_bool, gpr, target); - } - - Id Visit(const Expr& node) { - return std::visit(*this, *node); - } - -private: - SPIRVDecompiler& decomp; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(endif_label); - decomp.AddLabel(endif_label); - } - - void operator()([[maybe_unused]] const ASTIfElse& ast) { - UNREACHABLE(); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBasicBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpStore(decomp.flow_variables.at(ast.index), condition); - } - - void operator()([[maybe_unused]] const ASTLabel& ast) { - // Do nothing - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - const Id loop_label = decomp.OpLabel(); - const Id endloop_label = decomp.OpLabel(); - const Id loop_start_block = decomp.OpLabel(); - const Id loop_continue_block = decomp.OpLabel(); - current_loop_exit = endloop_label; - decomp.OpBranch(loop_label); - decomp.AddLabel(loop_label); - decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); - decomp.OpBranch(loop_start_block); - decomp.AddLabel(loop_start_block); - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.OpBranch(loop_continue_block); - decomp.AddLabel(loop_continue_block); - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - decomp.OpBranchConditional(condition, loop_label, endloop_label); - decomp.AddLabel(endloop_label); - } - - void operator()(const ASTReturn& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - if (ast.kills) { - decomp.OpKill(); - } else { - decomp.PreExit(); - decomp.OpReturn(); - } - decomp.AddLabel(decomp.OpLabel()); - } - } - - void operator()(const ASTBreak& ast) { - if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { - ExprDecompiler expr_parser{decomp}; - const Id condition = expr_parser.Visit(ast.condition); - const Id then_label = decomp.OpLabel(); - const Id endif_label = decomp.OpLabel(); - decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); - decomp.OpBranchConditional(condition, then_label, endif_label); - decomp.AddLabel(then_label); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(endif_label); - } else { - const Id next_block = decomp.OpLabel(); - decomp.OpBranch(next_block); - decomp.AddLabel(next_block); - decomp.OpBranch(current_loop_exit); - decomp.AddLabel(decomp.OpLabel()); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - SPIRVDecompiler& decomp; - Id current_loop_exit{}; -}; - -void SPIRVDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); - Name(id, fmt::format("flow_var_{}", i)); - flow_variables.emplace(i, AddGlobalVariable(id)); - } - - DefinePrologue(); - - const ASTNode program = ir.GetASTProgram(); - ASTDecompiler decompiler{*this}; - decompiler.Visit(program); - - const Id next_block = OpLabel(); - OpBranch(next_block); - AddLabel(next_block); -} - -} // Anonymous namespace - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second, cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_buffers.emplace_back(GlobalBufferEntry{ - .cbuf_index = base.cbuf_index, - .cbuf_offset = base.cbuf_offset, - .is_written = usage.is_written, - }); - } - for (const auto& sampler : ir.GetSamplers()) { - if (sampler.is_buffer) { - entries.uniform_texels.emplace_back(sampler); - } else { - entries.samplers.emplace_back(sampler); - } - } - for (const auto& image : ir.GetImages()) { - if (image.type == Tegra::Shader::ImageType::TextureBuffer) { - entries.storage_texels.emplace_back(image); - } else { - entries.images.emplace_back(image); - } - } - for (const auto& attribute : ir.GetInputAttributes()) { - if (IsGenericAttribute(attribute)) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); - } - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.clip_distances = ir.GetClipDistances(); - entries.shader_length = ir.GetLength(); - entries.uses_warps = ir.UsesWarps(); - return entries; -} - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - ShaderType stage, const VideoCommon::Shader::Registry& registry, - const Specialization& specialization) { - return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace Vulkan { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using StorageTexelEntry = VideoCommon::Shader::ImageEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -constexpr u32 DESCRIPTOR_SET = 0; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) - : ConstBuffer{entry_}, index{index_} {} - - constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -struct GlobalBufferEntry { - u32 cbuf_index{}; - u32 cbuf_offset{}; - bool is_written{}; -}; - -struct ShaderEntries { - u32 NumBindings() const { - return static_cast(const_buffers.size() + global_buffers.size() + - uniform_texels.size() + samplers.size() + storage_texels.size() + - images.size()); - } - - std::vector const_buffers; - std::vector global_buffers; - std::vector uniform_texels; - std::vector samplers; - std::vector storage_texels; - std::vector images; - std::set attributes; - std::array clip_distances{}; - std::size_t shader_length{}; - u32 enabled_uniform_buffers{}; - bool uses_warps{}; -}; - -struct Specialization final { - u32 base_binding{}; - - // Compute specific - std::array workgroup_size{}; - u32 shared_memory_size{}; - - // Graphics specific - std::optional point_size; - std::bitset enabled_attributes; - std::array attribute_types{}; - bool ndc_minus_one_to_one{}; - bool early_fragment_tests{}; - float alpha_test_ref{}; - Maxwell::ComparisonOp alpha_test_func{}; -}; -// Old gcc versions don't consider this trivially copyable. -// static_assert(std::is_trivially_copyable_v); - -struct SPIRVShader { - std::vector code; - ShaderEntries entries; -}; - -ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); - -std::vector Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, - const VideoCommon::Shader::Registry& registry, - const Specialization& specialization); - -} // namespace Vulkan -- cgit v1.2.3 From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: shader: Remove old shader management --- CMakeModules/GenerateSCMRev.cmake | 56 +- externals/Vulkan-Headers | 2 +- externals/sirit | 2 +- src/common/CMakeLists.txt | 56 +- src/video_core/CMakeLists.txt | 64 - .../engines/const_buffer_engine_interface.h | 103 - src/video_core/engines/kepler_compute.cpp | 44 +- src/video_core/engines/kepler_compute.h | 20 +- src/video_core/engines/maxwell_3d.cpp | 38 - src/video_core/engines/maxwell_3d.h | 20 +- src/video_core/guest_driver.cpp | 37 - src/video_core/guest_driver.h | 46 - src/video_core/rasterizer_interface.h | 16 +- .../renderer_opengl/gl_arb_decompiler.cpp | 2124 -------------- src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 314 +- src/video_core/renderer_opengl/gl_rasterizer.h | 33 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 564 +--- src/video_core/renderer_opengl/gl_shader_cache.h | 102 +- .../renderer_opengl/gl_shader_decompiler.cpp | 2986 -------------------- .../renderer_opengl/gl_shader_decompiler.h | 69 - .../renderer_opengl/gl_shader_disk_cache.cpp | 482 ---- .../renderer_opengl/gl_shader_disk_cache.h | 176 -- src/video_core/renderer_vulkan/blit_image.cpp | 1 - .../renderer_vulkan/vk_compute_pipeline.cpp | 136 +- .../renderer_vulkan/vk_compute_pipeline.h | 47 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 484 ---- .../renderer_vulkan/vk_graphics_pipeline.h | 103 - .../renderer_vulkan/vk_pipeline_cache.cpp | 375 +-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 91 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 361 +-- src/video_core/renderer_vulkan/vk_rasterizer.h | 47 +- src/video_core/shader/ast.cpp | 752 ----- src/video_core/shader/ast.h | 398 --- src/video_core/shader/async_shaders.cpp | 234 -- src/video_core/shader/async_shaders.h | 138 - src/video_core/shader/compiler_settings.cpp | 26 - src/video_core/shader/compiler_settings.h | 26 - src/video_core/shader/control_flow.cpp | 751 ----- src/video_core/shader/control_flow.h | 117 - src/video_core/shader/decode.cpp | 368 --- src/video_core/shader/decode/arithmetic.cpp | 166 -- src/video_core/shader/decode/arithmetic_half.cpp | 101 - .../shader/decode/arithmetic_half_immediate.cpp | 54 - .../shader/decode/arithmetic_immediate.cpp | 53 - .../shader/decode/arithmetic_integer.cpp | 375 --- .../shader/decode/arithmetic_integer_immediate.cpp | 99 - src/video_core/shader/decode/bfe.cpp | 77 - src/video_core/shader/decode/bfi.cpp | 45 - src/video_core/shader/decode/conversion.cpp | 321 --- src/video_core/shader/decode/ffma.cpp | 62 - src/video_core/shader/decode/float_set.cpp | 58 - .../shader/decode/float_set_predicate.cpp | 57 - src/video_core/shader/decode/half_set.cpp | 115 - .../shader/decode/half_set_predicate.cpp | 80 - src/video_core/shader/decode/hfma2.cpp | 73 - src/video_core/shader/decode/image.cpp | 536 ---- src/video_core/shader/decode/integer_set.cpp | 49 - .../shader/decode/integer_set_predicate.cpp | 53 - src/video_core/shader/decode/memory.cpp | 493 ---- src/video_core/shader/decode/other.cpp | 322 --- .../shader/decode/predicate_set_predicate.cpp | 68 - .../shader/decode/predicate_set_register.cpp | 46 - .../shader/decode/register_set_predicate.cpp | 86 - src/video_core/shader/decode/shift.cpp | 153 - src/video_core/shader/decode/texture.cpp | 935 ------ src/video_core/shader/decode/video.cpp | 169 -- src/video_core/shader/decode/warp.cpp | 117 - src/video_core/shader/decode/xmad.cpp | 156 - src/video_core/shader/expr.cpp | 93 - src/video_core/shader/expr.h | 156 - src/video_core/shader/memory_util.cpp | 76 - src/video_core/shader/memory_util.h | 43 - src/video_core/shader/node.h | 701 ----- src/video_core/shader/node_helper.cpp | 115 - src/video_core/shader/node_helper.h | 71 - src/video_core/shader/registry.cpp | 181 -- src/video_core/shader/registry.h | 172 -- src/video_core/shader/shader_ir.cpp | 464 --- src/video_core/shader/shader_ir.h | 479 ---- src/video_core/shader/track.cpp | 236 -- src/video_core/shader/transform_feedback.cpp | 115 - src/video_core/shader/transform_feedback.h | 23 - 83 files changed, 57 insertions(+), 19625 deletions(-) delete mode 100644 src/video_core/engines/const_buffer_engine_interface.h delete mode 100644 src/video_core/guest_driver.cpp delete mode 100644 src/video_core/guest_driver.h delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.h delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/shader/ast.cpp delete mode 100644 src/video_core/shader/ast.h delete mode 100644 src/video_core/shader/async_shaders.cpp delete mode 100644 src/video_core/shader/async_shaders.h delete mode 100644 src/video_core/shader/compiler_settings.cpp delete mode 100644 src/video_core/shader/compiler_settings.h delete mode 100644 src/video_core/shader/control_flow.cpp delete mode 100644 src/video_core/shader/control_flow.h delete mode 100644 src/video_core/shader/decode.cpp delete mode 100644 src/video_core/shader/decode/arithmetic.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_half_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_immediate.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer.cpp delete mode 100644 src/video_core/shader/decode/arithmetic_integer_immediate.cpp delete mode 100644 src/video_core/shader/decode/bfe.cpp delete mode 100644 src/video_core/shader/decode/bfi.cpp delete mode 100644 src/video_core/shader/decode/conversion.cpp delete mode 100644 src/video_core/shader/decode/ffma.cpp delete mode 100644 src/video_core/shader/decode/float_set.cpp delete mode 100644 src/video_core/shader/decode/float_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/half_set.cpp delete mode 100644 src/video_core/shader/decode/half_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/hfma2.cpp delete mode 100644 src/video_core/shader/decode/image.cpp delete mode 100644 src/video_core/shader/decode/integer_set.cpp delete mode 100644 src/video_core/shader/decode/integer_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/memory.cpp delete mode 100644 src/video_core/shader/decode/other.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/predicate_set_register.cpp delete mode 100644 src/video_core/shader/decode/register_set_predicate.cpp delete mode 100644 src/video_core/shader/decode/shift.cpp delete mode 100644 src/video_core/shader/decode/texture.cpp delete mode 100644 src/video_core/shader/decode/video.cpp delete mode 100644 src/video_core/shader/decode/warp.cpp delete mode 100644 src/video_core/shader/decode/xmad.cpp delete mode 100644 src/video_core/shader/expr.cpp delete mode 100644 src/video_core/shader/expr.h delete mode 100644 src/video_core/shader/memory_util.cpp delete mode 100644 src/video_core/shader/memory_util.h delete mode 100644 src/video_core/shader/node.h delete mode 100644 src/video_core/shader/node_helper.cpp delete mode 100644 src/video_core/shader/node_helper.h delete mode 100644 src/video_core/shader/registry.cpp delete mode 100644 src/video_core/shader/registry.h delete mode 100644 src/video_core/shader/shader_ir.cpp delete mode 100644 src/video_core/shader/shader_ir.h delete mode 100644 src/video_core/shader/track.cpp delete mode 100644 src/video_core/shader/transform_feedback.cpp delete mode 100644 src/video_core/shader/transform_feedback.h (limited to 'src/video_core') diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 311ba1c2e..77358768e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -51,61 +51,7 @@ endif() # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) set(VIDEO_CORE "${SRC_DIR}/src/video_core") set(HASH_FILES - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" + # ... ) set(COMBINED "") foreach (F IN LISTS HASH_FILES) diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 8188e3fbb..07c4a37bc 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0 +Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6 diff --git a/externals/sirit b/externals/sirit index 200310e8f..a39596358 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a +Subproject commit a39596358a3a5488c06554c0c15184a6af71e433 diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e03fffd8d..c92266a17 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp DEPENDS # WARNING! It was too much work to try and make a common location for this list, # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" - "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" - "${VIDEO_CORE}/shader/decode/arithmetic.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp" - "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp" - "${VIDEO_CORE}/shader/decode/bfe.cpp" - "${VIDEO_CORE}/shader/decode/bfi.cpp" - "${VIDEO_CORE}/shader/decode/conversion.cpp" - "${VIDEO_CORE}/shader/decode/ffma.cpp" - "${VIDEO_CORE}/shader/decode/float_set.cpp" - "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/half_set.cpp" - "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/hfma2.cpp" - "${VIDEO_CORE}/shader/decode/image.cpp" - "${VIDEO_CORE}/shader/decode/integer_set.cpp" - "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/memory.cpp" - "${VIDEO_CORE}/shader/decode/texture.cpp" - "${VIDEO_CORE}/shader/decode/other.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" - "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" - "${VIDEO_CORE}/shader/decode/shift.cpp" - "${VIDEO_CORE}/shader/decode/video.cpp" - "${VIDEO_CORE}/shader/decode/warp.cpp" - "${VIDEO_CORE}/shader/decode/xmad.cpp" - "${VIDEO_CORE}/shader/ast.cpp" - "${VIDEO_CORE}/shader/ast.h" - "${VIDEO_CORE}/shader/compiler_settings.cpp" - "${VIDEO_CORE}/shader/compiler_settings.h" - "${VIDEO_CORE}/shader/control_flow.cpp" - "${VIDEO_CORE}/shader/control_flow.h" - "${VIDEO_CORE}/shader/decode.cpp" - "${VIDEO_CORE}/shader/expr.cpp" - "${VIDEO_CORE}/shader/expr.h" - "${VIDEO_CORE}/shader/node.h" - "${VIDEO_CORE}/shader/node_helper.cpp" - "${VIDEO_CORE}/shader/node_helper.h" - "${VIDEO_CORE}/shader/registry.cpp" - "${VIDEO_CORE}/shader/registry.h" - "${VIDEO_CORE}/shader/shader_ir.cpp" - "${VIDEO_CORE}/shader/shader_ir.h" - "${VIDEO_CORE}/shader/track.cpp" - "${VIDEO_CORE}/shader/transform_feedback.cpp" - "${VIDEO_CORE}/shader/transform_feedback.h" + # ... # and also check that the scm_rev files haven't changed "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e4de55f4d..c5ce71706 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,7 +29,6 @@ add_library(video_core STATIC dirty_flags.h dma_pusher.cpp dma_pusher.h - engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_interface.h engines/engine_upload.cpp @@ -61,8 +60,6 @@ add_library(video_core STATIC gpu.h gpu_thread.cpp gpu_thread.h - guest_driver.cpp - guest_driver.h memory_manager.cpp memory_manager.h query_cache.h @@ -71,8 +68,6 @@ add_library(video_core STATIC rasterizer_interface.h renderer_base.cpp renderer_base.h - renderer_opengl/gl_arb_decompiler.cpp - renderer_opengl/gl_arb_decompiler.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_device.cpp @@ -85,10 +80,6 @@ add_library(video_core STATIC renderer_opengl/gl_resource_manager.h renderer_opengl/gl_shader_cache.cpp renderer_opengl/gl_shader_cache.h - renderer_opengl/gl_shader_decompiler.cpp - renderer_opengl/gl_shader_decompiler.h - renderer_opengl/gl_shader_disk_cache.cpp - renderer_opengl/gl_shader_disk_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp @@ -128,8 +119,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_graphics_pipeline.cpp - renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp @@ -142,8 +131,6 @@ add_library(video_core STATIC renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h - renderer_vulkan/vk_shader_decompiler.cpp - renderer_vulkan/vk_shader_decompiler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp @@ -159,57 +146,6 @@ add_library(video_core STATIC shader_cache.h shader_notify.cpp shader_notify.h - shader/decode/arithmetic.cpp - shader/decode/arithmetic_immediate.cpp - shader/decode/bfe.cpp - shader/decode/bfi.cpp - shader/decode/shift.cpp - shader/decode/arithmetic_integer.cpp - shader/decode/arithmetic_integer_immediate.cpp - shader/decode/arithmetic_half.cpp - shader/decode/arithmetic_half_immediate.cpp - shader/decode/ffma.cpp - shader/decode/hfma2.cpp - shader/decode/conversion.cpp - shader/decode/memory.cpp - shader/decode/texture.cpp - shader/decode/image.cpp - shader/decode/float_set_predicate.cpp - shader/decode/integer_set_predicate.cpp - shader/decode/half_set_predicate.cpp - shader/decode/predicate_set_register.cpp - shader/decode/predicate_set_predicate.cpp - shader/decode/register_set_predicate.cpp - shader/decode/float_set.cpp - shader/decode/integer_set.cpp - shader/decode/half_set.cpp - shader/decode/video.cpp - shader/decode/warp.cpp - shader/decode/xmad.cpp - shader/decode/other.cpp - shader/ast.cpp - shader/ast.h - shader/async_shaders.cpp - shader/async_shaders.h - shader/compiler_settings.cpp - shader/compiler_settings.h - shader/control_flow.cpp - shader/control_flow.h - shader/decode.cpp - shader/expr.cpp - shader/expr.h - shader/memory_util.cpp - shader/memory_util.h - shader/node_helper.cpp - shader/node_helper.h - shader/node.h - shader/registry.cpp - shader/registry.h - shader/shader_ir.cpp - shader/shader_ir.h - shader/track.cpp - shader/transform_feedback.cpp - shader/transform_feedback.h surface.cpp surface.h texture_cache/accelerated_swizzle.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h deleted file mode 100644 index f46e81bb7..000000000 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include "common/bit_field.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" -#include "video_core/textures/texture.h" - -namespace Tegra::Engines { - -struct SamplerDescriptor { - union { - u32 raw = 0; - BitField<0, 2, Tegra::Shader::TextureType> texture_type; - BitField<2, 3, Tegra::Texture::ComponentType> r_type; - BitField<5, 1, u32> is_array; - BitField<6, 1, u32> is_buffer; - BitField<7, 1, u32> is_shadow; - BitField<8, 3, Tegra::Texture::ComponentType> g_type; - BitField<11, 3, Tegra::Texture::ComponentType> b_type; - BitField<14, 3, Tegra::Texture::ComponentType> a_type; - BitField<17, 7, Tegra::Texture::TextureFormat> format; - }; - - bool operator==(const SamplerDescriptor& rhs) const noexcept { - return raw == rhs.raw; - } - - bool operator!=(const SamplerDescriptor& rhs) const noexcept { - return !operator==(rhs); - } - - static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { - using Tegra::Shader::TextureType; - SamplerDescriptor result; - - result.format.Assign(tic.format.Value()); - result.r_type.Assign(tic.r_type.Value()); - result.g_type.Assign(tic.g_type.Value()); - result.b_type.Assign(tic.b_type.Value()); - result.a_type.Assign(tic.a_type.Value()); - - switch (tic.texture_type.Value()) { - case Tegra::Texture::TextureType::Texture1D: - result.texture_type.Assign(TextureType::Texture1D); - return result; - case Tegra::Texture::TextureType::Texture2D: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::Texture3D: - result.texture_type.Assign(TextureType::Texture3D); - return result; - case Tegra::Texture::TextureType::TextureCubemap: - result.texture_type.Assign(TextureType::TextureCube); - return result; - case Tegra::Texture::TextureType::Texture1DArray: - result.texture_type.Assign(TextureType::Texture1D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DArray: - result.texture_type.Assign(TextureType::Texture2D); - result.is_array.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture1DBuffer: - result.texture_type.Assign(TextureType::Texture1D); - result.is_buffer.Assign(1); - return result; - case Tegra::Texture::TextureType::Texture2DNoMipmap: - result.texture_type.Assign(TextureType::Texture2D); - return result; - case Tegra::Texture::TextureType::TextureCubeArray: - result.texture_type.Assign(TextureType::TextureCube); - result.is_array.Assign(1); - return result; - default: - result.texture_type.Assign(TextureType::Texture2D); - return result; - } - } -}; -static_assert(std::is_trivially_copyable_v); - -class ConstBufferEngineInterface { -public: - virtual ~ConstBufferEngineInterface() = default; - virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; - virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; - virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const = 0; - virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; - virtual u32 GetBoundBuffer() const = 0; - - virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; - virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; -}; - -} // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a9b75091e..cae93c470 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun } } -u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& buffer = launch_description.const_buffer_config[const_buffer]; - u32 result; - std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); - return result; -} - -SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage == ShaderType::Compute); - const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); - - const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; - LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - - rasterizer->DispatchCompute(code_addr); + rasterizer->DispatchCompute(); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 7c40cba38..0d7683c2d 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -10,7 +10,6 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/engines/shader_type.h" @@ -40,7 +39,7 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) -class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { +class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); @@ -209,23 +208,6 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - private: void ProcessLaunch(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index aab6b8f7a..103a51fd0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() { rasterizer->Clear(); } -u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader_stage = state.shader_stages[static_cast(stage)]; - const auto& buffer = shader_stage.const_buffers[const_buffer]; - return memory_manager.Read(buffer.address + offset); -} - -SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { - return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); -} - -SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const { - ASSERT(stage != ShaderType::Compute); - const auto& shader = state.shader_stages[static_cast(stage)]; - const auto& tex_info_buffer = shader.const_buffers[const_buffer]; - const GPUVAddr tex_info_address = tex_info_buffer.address + offset; - return AccessSampler(memory_manager.Read(tex_info_address)); -} - -SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { - const Texture::TextureHandle tex_handle{handle}; - const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id); - const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id); - - SamplerDescriptor result = SamplerDescriptor::FromTIC(tic); - result.is_shadow.Assign(tsc.depth_compare_enabled.Value()); - return result; -} - -VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer->AccessGuestDriverProfile(); -} - -const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer->AccessGuestDriverProfile(); -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 335383955..cbf94412b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -17,7 +17,6 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" @@ -49,7 +48,7 @@ namespace Tegra::Engines { #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) -class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { +class Maxwell3D final : public EngineInterface { public: explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); ~Maxwell3D(); @@ -1424,23 +1423,6 @@ public: void FlushMMEInlineDraw(); - u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; - - SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; - - SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, - u64 offset) const override; - - SamplerDescriptor AccessSampler(u32 handle) const override; - - u32 GetBoundBuffer() const override { - return regs.tex_cb_index; - } - - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; - - const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; - bool ShouldExecute() const { return execute_on; } diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp deleted file mode 100644 index f058f2744..000000000 --- a/src/video_core/guest_driver.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/guest_driver.h" - -namespace VideoCore { - -void GuestDriverProfile::DeduceTextureHandlerSize(std::vector bound_offsets) { - if (texture_handler_size) { - return; - } - const std::size_t size = bound_offsets.size(); - if (size < 2) { - return; - } - std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); - u32 min_val = std::numeric_limits::max(); - for (std::size_t i = 1; i < size; ++i) { - if (bound_offsets[i] == bound_offsets[i - 1]) { - continue; - } - const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; - min_val = std::min(min_val, new_min); - } - if (min_val > 2) { - return; - } - texture_handler_size = min_texture_handler_size * min_val; -} - -} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h deleted file mode 100644 index 21e569ba1..000000000 --- a/src/video_core/guest_driver.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace VideoCore { - -/** - * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks as they are - * Entscheidungsproblems. - */ -class GuestDriverProfile { -public: - explicit GuestDriverProfile() = default; - explicit GuestDriverProfile(std::optional texture_handler_size_) - : texture_handler_size{texture_handler_size_} {} - - void DeduceTextureHandlerSize(std::vector bound_offsets); - - u32 GetTextureHandlerSize() const { - return texture_handler_size.value_or(default_texture_handler_size); - } - - bool IsTextureHandlerSizeKnown() const { - return texture_handler_size.has_value(); - } - -private: - // Minimum size of texture handler any driver can use. - static constexpr u32 min_texture_handler_size = 4; - - // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. - // Thus, certain drivers may squish the size. - static constexpr u32 default_texture_handler_size = 8; - - std::optional texture_handler_size = default_texture_handler_size; -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 58014c1c3..b094fc064 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" -#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -45,7 +44,7 @@ public: virtual void Clear() = 0; /// Dispatches a compute shader invocation - virtual void DispatchCompute(GPUVAddr code_addr) = 0; + virtual void DispatchCompute() = 0; /// Resets the counter of a query virtual void ResetCounter(QueryType type) = 0; @@ -136,18 +135,5 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() { - return guest_driver_profile; - } - - /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. - [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const { - return guest_driver_profile; - } - -private: - GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template - std::string AtomicImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward(args)...); - } - - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Trinary, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("P{}.x", static_cast(expr->predicate)); - } - if (const auto expr = std::get_if(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if(&*low); - const auto* const imm_high = std::get_if(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast(index)); - } else if (const auto internal_flag = std::get_if(&*dest)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const u32 target = std::get(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,40 +54,6 @@ namespace { constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -119,44 +85,6 @@ std::pair TransformFeedbackEnum(u8 location) { void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); -} + const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); - ++num_queued_commands; +void RasterizerOpenGL::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast(num_samplers), samplers); - glBindTextures(base_texture, static_cast(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -230,8 +203,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -251,8 +222,6 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector image_view_indices; std::array image_view_ids; boost::container::static_vector sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,307 +20,19 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" namespace OpenGL { -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - -namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } -} - -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; -} - -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; -} - -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; - } - return {}; -} - -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} - -std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); - } - return registry; -} - -std::unordered_set GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast(format)); - } - return supported_formats; -} - -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast(arb.size()), - reinterpret_cast(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; -} - -Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; - } - if (is_built) { - ASSERT(handle != 0); - } -} +Shader::Shader() = default; Shader::~Shader() = default; -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared(); - return std::unique_ptr( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); - } -} - -std::unique_ptr Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; - } - - std::vector gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); - } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); - } - - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector> contexts(num_workers); - std::vector threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); - } - for (auto& thread : threads) { - thread.join(); - } - - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } - - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; - } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; - } - } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; -} - -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast(program)]; - if (last_shader->IsBuilt()) { - return last_shader; - } - } - - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; - - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; - } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); - } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); - } - } - - // Look up shader in the cache based on address - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); - } - - return last_shaders[static_cast(program)] = result; -} - -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -19,10 +19,6 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -33,10 +29,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; @@ -44,77 +36,10 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class Shader { public: + explicit Shader(); ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); - - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); - - static std::unique_ptr CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); - - static std::unique_ptr CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); - -private: - explicit Shader(std::shared_ptr registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { @@ -126,36 +51,13 @@ public: Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); - - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); - - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); - private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats); - Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; - - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map runtime_cache; - - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair; -using TextureIR = std::variant; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return ""; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast(index) >= static_cast(Attribute::Index::TexCoord_0) && - static_cast(index) <= static_cast(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast(value + static_cast(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast(index) - static_cast(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast(static_cast(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison(operation, "lessThan"); - } - - template - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison(operation, "equal"); - } - - template - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison(operation, "lessThanEqual"); - } - - template - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison(operation, "greaterThan"); - } - - template - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison(operation, "notEqual"); - } - - template - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast(flag); - ASSERT(index < static_cast(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map transform_feedback; - - ShaderWriter code; - - std::optional max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector const_buffers; - std::vector global_memory_entries; - std::vector samplers; - std::vector images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector flat_keys(num_keys); - std::vector flat_bound_samplers(num_bound_samplers); - std::vector flat_separate_samplers(num_separate_samplers); - std::vector flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast(type)) || - !file.WriteObject(static_cast(code.size())) || - !file.WriteObject(static_cast(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast(keys.size())) || - !file.WriteObject(static_cast(bound_samplers.size())) || - !file.WriteObject(static_cast(separate_samplers.size())) || - !file.WriteObject(static_cast(bindless_samplers.size()))) { - return false; - } - - std::vector flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector entries; - while (static_cast(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi cmdbuf.SetScissor(0, scissor); cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); } - } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -8,146 +8,14 @@ #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_) - : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, - descriptor_set_layout{CreateDescriptorSetLayout()}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate()}, - shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} +ComputePipeline::ComputePipeline() = default; -VKComputePipeline::~VKComputePipeline() = default; - -VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { - std::vector bindings; - u32 binding = 0; - const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { - // TODO(Rodrigo): Maybe make individual bindings here? - for (u32 bindpoint = 0; bindpoint < static_cast(num_entries); ++bindpoint) { - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - } - }; - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); - add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - return device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }); -} - -vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }); -} - -vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector& code) const { - device.SaveShader(code); - - return device.GetLogical().CreateShaderModule({ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = code.size() * sizeof(u32), - .pCode = code.data(), - }); -} - -vk::Pipeline VKComputePipeline::CreatePipeline() const { - - VkComputePipelineCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *shader_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *layout, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - ci.stage.pNext = &subgroup_size_ci; - } - - return device.GetLogical().CreateComputePipeline(ci); -} +ComputePipeline::~ComputePipeline() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,7 +6,6 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -15,50 +14,10 @@ class Device; class VKScheduler; class VKUpdateDescriptorQueue; -class VKComputePipeline final { +class ComputePipeline { public: - explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const SPIRVShader& shader_); - ~VKComputePipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout() const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; - - vk::ShaderModule CreateShaderModule(const std::vector& code) const; - - vk::Pipeline CreatePipeline() const; - - const Device& device; - VKScheduler& scheduler; - ShaderEntries entries; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - vk::ShaderModule shader_module; - vk::Pipeline pipeline; + explicit ComputePipeline(); + ~ComputePipeline(); }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null @@ -1,484 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/maxwell_to_vk.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -MICROPROFILE_DECLARE(Vulkan_PipelineCache); - -namespace { - -template -VkStencilOpState GetStencilFaceState(const StencilFace& face) { - return { - .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), - .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), - .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), - .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), - .compareMask = 0, - .writeMask = 0, - .reference = 0, - }; -} - -bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { - static constexpr std::array unsupported_topologies = { - VK_PRIMITIVE_TOPOLOGY_POINT_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, - VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, - VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; - return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), - topology) == std::end(unsupported_topologies); -} - -VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { - union Swizzle { - u32 raw; - BitField<0, 3, Maxwell::ViewportSwizzle> x; - BitField<4, 3, Maxwell::ViewportSwizzle> y; - BitField<8, 3, Maxwell::ViewportSwizzle> z; - BitField<12, 3, Maxwell::ViewportSwizzle> w; - }; - const Swizzle unpacked{swizzle}; - - return { - .x = MaxwellToVK::ViewportSwizzle(unpacked.x), - .y = MaxwellToVK::ViewportSwizzle(unpacked.y), - .z = MaxwellToVK::ViewportSwizzle(unpacked.z), - .w = MaxwellToVK::ViewportSwizzle(unpacked.w), - }; -} - -VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { - switch (msaa_mode) { - case Tegra::Texture::MsaaMode::Msaa1x1: - return VK_SAMPLE_COUNT_1_BIT; - case Tegra::Texture::MsaaMode::Msaa2x1: - case Tegra::Texture::MsaaMode::Msaa2x1_D3D: - return VK_SAMPLE_COUNT_2_BIT; - case Tegra::Texture::MsaaMode::Msaa2x2: - case Tegra::Texture::MsaaMode::Msaa2x2_VC4: - case Tegra::Texture::MsaaMode::Msaa2x2_VC12: - return VK_SAMPLE_COUNT_4_BIT; - case Tegra::Texture::MsaaMode::Msaa4x2: - case Tegra::Texture::MsaaMode::Msaa4x2_D3D: - case Tegra::Texture::MsaaMode::Msaa4x2_VC8: - case Tegra::Texture::MsaaMode::Msaa4x2_VC24: - return VK_SAMPLE_COUNT_8_BIT; - case Tegra::Texture::MsaaMode::Msaa4x4: - return VK_SAMPLE_COUNT_16_BIT; - default: - UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); - return VK_SAMPLE_COUNT_1_BIT; - } -} - -} // Anonymous namespace - -VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers) - : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, - descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, - descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, - update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, - descriptor_template{CreateDescriptorUpdateTemplate(program)}, - modules(CreateShaderModules(program)), - pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} - -VKGraphicsPipeline::~VKGraphicsPipeline() = default; - -VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { - if (!descriptor_template) { - return {}; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(*descriptor_template, set); - return set; -} - -vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( - vk::Span bindings) const { - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = bindings.size(), - .pBindings = bindings.data(), - }; - return device.GetLogical().CreateDescriptorSetLayout(ci); -} - -vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - return device.GetLogical().CreatePipelineLayout(ci); -} - -vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const { - std::vector template_entries; - u32 binding = 0; - u32 offset = 0; - for (const auto& stage : program) { - if (stage) { - FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); - } - } - if (template_entries.empty()) { - // If the shader doesn't use descriptor sets, skip template creation. - return {}; - } - - const VkDescriptorUpdateTemplateCreateInfoKHR ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(template_entries.size()), - .pDescriptorUpdateEntries = template_entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *layout, - .set = DESCRIPTOR_SET, - }; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); -} - -std::vector VKGraphicsPipeline::CreateShaderModules( - const SPIRVProgram& program) const { - VkShaderModuleCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .codeSize = 0, - .pCode = nullptr, - }; - - std::vector shader_modules; - shader_modules.reserve(Maxwell::MaxShaderStage); - for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { - const auto& stage = program[i]; - if (!stage) { - continue; - } - - device.SaveShader(stage->code); - - ci.codeSize = stage->code.size() * sizeof(u32); - ci.pCode = stage->code.data(); - shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); - } - return shader_modules; -} - -vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, - VkRenderPass renderpass, - u32 num_color_buffers) const { - const auto& state = cache_key.fixed_state; - const auto& viewport_swizzles = state.viewport_swizzles; - - FixedPipelineState::DynamicState dynamic; - if (device.IsExtExtendedDynamicStateSupported()) { - // Insert dummy values, as long as they are valid they don't matter as extended dynamic - // state is ignored - dynamic.raw1 = 0; - dynamic.raw2 = 0; - dynamic.vertex_strides.fill(0); - } else { - dynamic = state.dynamic_state; - } - - std::vector vertex_bindings; - std::vector vertex_binding_divisors; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ - .binding = static_cast(index), - .divisor = state.binding_divisors[index], - }); - } - } - - std::vector vertex_attributes; - const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; - if (!attribute.enabled) { - continue; - } - if (!input_attributes.contains(static_cast(index))) { - // Skip attributes not used by the vertex shaders. - continue; - } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); - } - - VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), - .pVertexBindingDescriptions = vertex_bindings.data(), - .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), - .pVertexAttributeDescriptions = vertex_attributes.data(), - }; - - const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), - .pVertexBindingDivisors = vertex_binding_divisors.data(), - }; - if (!vertex_binding_divisors.empty()) { - vertex_input_ci.pNext = &input_divisor_ci; - } - - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), - .primitiveRestartEnable = state.primitive_restart_enable != 0 && - SupportsPrimitiveRestart(input_assembly_topology), - }; - - const VkPipelineTessellationStateCreateInfo tessellation_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, - }; - - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; - - std::array swizzles; - std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewportSwizzles = swizzles.data(), - }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), - .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = static_cast( - dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), - .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisample_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthTestEnable = dynamic.depth_test_enable, - .depthWriteEnable = dynamic.depth_write_enable, - .depthCompareOp = dynamic.depth_test_enable - ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) - : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, - .stencilTestEnable = dynamic.stencil_enable, - .front = GetStencilFaceState(dynamic.front), - .back = GetStencilFaceState(dynamic.back), - .minDepthBounds = 0.0f, - .maxDepthBounds = 0.0f, - }; - - std::array cb_attachments; - for (std::size_t index = 0; index < num_color_buffers; ++index) { - static constexpr std::array COMPONENT_TABLE{ - VK_COLOR_COMPONENT_R_BIT, - VK_COLOR_COMPONENT_G_BIT, - VK_COLOR_COMPONENT_B_BIT, - VK_COLOR_COMPONENT_A_BIT, - }; - const auto& blend = state.attachments[index]; - - VkColorComponentFlags color_components = 0; - for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { - if (blend.Mask()[i]) { - color_components |= COMPONENT_TABLE[i]; - } - } - - cb_attachments[index] = { - .blendEnable = blend.enable != 0, - .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), - .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), - .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), - .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), - .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), - .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), - .colorWriteMask = color_components, - }; - } - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = num_color_buffers, - .pAttachments = cb_attachments.data(), - .blendConstants = {}, - }; - - std::vector dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, - VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, - VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, - }; - if (device.IsExtExtendedDynamicStateSupported()) { - static constexpr std::array extended{ - VK_DYNAMIC_STATE_CULL_MODE_EXT, - VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, - VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, - VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, - VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, - VK_DYNAMIC_STATE_STENCIL_OP_EXT, - }; - dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); - } - - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - - std::vector shader_stages; - std::size_t module_index = 0; - for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - if (!program[stage]) { - continue; - } - - VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = MaxwellToVK::ShaderStage(static_cast(stage)); - stage_ci.module = *modules[module_index++]; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { - stage_ci.pNext = &subgroup_size_ci; - } - } - return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast(shader_stages.size()), - .pStages = shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = &tessellation_ci, - .pViewportState = &viewport_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisample_ci, - .pDepthStencilState = &depth_stencil_ci, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *layout, - .renderPass = renderpass, - .subpass = 0, - .basePipelineHandle = nullptr, - .basePipelineIndex = 0, - }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsPipelineCacheKey { - VkRenderPass renderpass; - std::array shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class Device; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - -using SPIRVProgram = std::array, Maxwell::MaxShaderStage>; - -class VKGraphicsPipeline final { -public: - explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - const GraphicsPipelineCacheKey& key, - vk::Span bindings, - const SPIRVProgram& program, u32 num_color_buffers); - ~VKGraphicsPipeline(); - - VkDescriptorSet CommitDescriptorSet(); - - VkPipeline GetHandle() const { - return *pipeline; - } - - VkPipelineLayout GetLayout() const { - return *layout; - } - - GraphicsPipelineCacheKey GetCacheKey() const { - return cache_key; - } - -private: - vk::DescriptorSetLayout CreateDescriptorSetLayout( - vk::Span bindings) const; - - vk::PipelineLayout CreatePipelineLayout() const; - - vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const SPIRVProgram& program) const; - - std::vector CreateShaderModules(const SPIRVProgram& program) const; - - vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, - u32 num_color_buffers) const; - - const Device& device; - VKScheduler& scheduler; - const GraphicsPipelineCacheKey cache_key; - const u64 hash; - - vk::DescriptorSetLayout descriptor_set_layout; - DescriptorAllocator descriptor_allocator; - VKUpdateDescriptorQueue& update_descriptor_queue; - vk::PipelineLayout layout; - vk::DescriptorUpdateTemplateKHR descriptor_template; - std::vector modules; - - vk::Pipeline pipeline; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -19,49 +19,27 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { - MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; namespace { - -constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; -constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; -constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - -constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ - .depth = VideoCommon::Shader::CompileDepth::FullDecompile, - .disable_else_derivation = true, -}; - -constexpr std::size_t GetStageFromProgram(std::size_t program) { +size_t StageFromProgram(size_t program) { return program == 0 ? 0 : program - 1; } -constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(GetStageFromProgram(static_cast(program))); +ShaderType StageFromProgram(Maxwell::ShaderProgram program) { + return static_cast(StageFromProgram(static_cast(program))); } ShaderType GetShaderType(Maxwell::ShaderProgram program) { @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { return ShaderType::Vertex; } } - -template -void AddBindings(std::vector& bindings, u32& binding, - VkShaderStageFlags stage_flags, const Container& container) { - const u32 num_entries = static_cast(std::size(container)); - for (std::size_t i = 0; i < num_entries; ++i) { - u32 count = 1; - if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - // Combined image samplers can be arrayed. - count = container[i].size; - } - bindings.push_back({ - .binding = binding++, - .descriptorType = descriptor_type, - .descriptorCount = count, - .stageFlags = stage_flags, - .pImmutableSamplers = nullptr, - }); - } -} - -u32 FillDescriptorLayout(const ShaderEntries& entries, - std::vector& bindings, - Maxwell::ShaderProgram program_type, u32 base_binding) { - const ShaderType stage = GetStageFromProgram(program_type); - const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); - - u32 binding = base_binding; - AddBindings(bindings, binding, flags, entries.const_buffers); - AddBindings(bindings, binding, flags, entries.global_buffers); - AddBindings(bindings, binding, flags, entries.uniform_texels); - AddBindings(bindings, binding, flags, entries.samplers); - AddBindings(bindings, binding, flags, entries.storage_texels); - AddBindings(bindings, binding, flags, entries.images); - return binding; -} - } // Anonymous namespace -std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { - const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); - return static_cast(hash); -} - -bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::memcmp(&rhs, this, Size()) == 0; -} - -std::size_t ComputePipelineCacheKey::Hash() const noexcept { +size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); - return static_cast(hash); + return static_cast(hash); } bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, - GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) - : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), - shader_ir(program_code, main_offset_, compiler_settings, registry), - entries(GenerateShaderEntries(shader_ir)) {} +Shader::Shader() = default; Shader::~Shader() = default; -VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} -VKPipelineCache::~VKPipelineCache() = default; - -std::array VKPipelineCache::GetShaders() { - std::array shaders{}; - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - - const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - if (!result) { - const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; - - // No shader found - create a new one - static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; - const auto stage = static_cast(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader = std::make_unique(maxwell3d, stage, gpu_addr, *cpu_addr, - std::move(code), stage_offset); - result = shader.get(); - - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, size_in_bytes); - } else { - null_shader = std::move(shader); - } - } - shaders[index] = result; - } - return last_shaders = shaders; -} - -VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( - const GraphicsPipelineCacheKey& key, u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders) { - MICROPROFILE_SCOPE(Vulkan_PipelineCache); - - if (last_graphics_pipeline && last_graphics_key == key) { - return last_graphics_pipeline; - } - last_graphics_key = key; - - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { - std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, - update_descriptor_queue, bindings, program, key, - num_color_buffers); - } - last_graphics_pipeline = pair->second.get(); - return last_graphics_pipeline; - } - - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); - auto& entry = pair->second; - if (is_cache_miss) { - gpu.ShaderNotify().MarkSharderBuilding(); - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key.fixed_state); - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, key, bindings, - program, num_color_buffers); - gpu.ShaderNotify().MarkShaderComplete(); - } - last_graphics_pipeline = entry.get(); - return last_graphics_pipeline; -} +PipelineCache::~PipelineCache() = default; -VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - - const GPUVAddr gpu_addr = key.shader; - - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); - if (!shader) { - // No shader found - create a new one - const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - - ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); - const std::size_t size_in_bytes = code.size() * sizeof(u64); - - auto shader_info = std::make_unique(kepler_compute, ShaderType::Compute, gpu_addr, - *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); - shader = shader_info.get(); - - if (cpu_addr) { - Register(std::move(shader_info), *cpu_addr, size_in_bytes); - } else { - null_kernel = std::move(shader_info); - } - } - - const Specialization specialization{ - .base_binding = 0, - .workgroup_size = key.workgroup_size, - .shared_memory_size = key.shared_memory_size, - .point_size = std::nullopt, - .enabled_attributes = {}, - .attribute_types = {}, - .ndc_minus_one_to_one = false, - }; - const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, - shader->GetRegistry(), specialization), - shader->GetEntries()}; - entry = std::make_unique(device, scheduler, descriptor_pool, - update_descriptor_queue, spirv_shader); - return *entry; -} - -void VKPipelineCache::EmplacePipeline(std::unique_ptr pipeline) { - gpu.ShaderNotify().MarkShaderComplete(); - std::unique_lock lock{pipeline_cache}; - graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); -} - -void VKPipelineCache::OnShaderRemoval(Shader* shader) { - bool finished = false; - const auto Finish = [&] { - // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and - // flush. - if (finished) { - return; - } - finished = true; - scheduler.Finish(); - }; - - const GPUVAddr invalidated_addr = shader->GetGpuAddr(); - for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { - auto& entry = it->first; - if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == - entry.shaders.end()) { - ++it; - continue; - } - Finish(); - it = graphics_cache.erase(it); - } - for (auto it = compute_cache.begin(); it != compute_cache.end();) { - auto& entry = it->first; - if (entry.shader != invalidated_addr) { - ++it; - continue; - } - Finish(); - it = compute_cache.erase(it); - } -} - -std::pair> -VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - Specialization specialization; - if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { - float point_size; - std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); - specialization.point_size = point_size; - ASSERT(point_size != 0.0f); - } - for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - const auto& attribute = fixed_state.attributes[i]; - specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; - specialization.attribute_types[i] = attribute.Type(); - } - specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; - specialization.early_fragment_tests = fixed_state.early_z; - - // Alpha test - specialization.alpha_test_func = - FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); - specialization.alpha_test_ref = Common::BitCast(fixed_state.alpha_test_ref); - - SPIRVProgram program; - std::vector bindings; - - for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { - const auto program_enum = static_cast(index); - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - continue; - } - const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); - const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); - - const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 - const ShaderType program_type = GetShaderType(program_enum); - const auto& entries = shader->GetEntries(); - program[stage] = { - Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), - entries, - }; - - const u32 old_binding = specialization.base_binding; - specialization.base_binding = - FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); - ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); - } - return {std::move(program), std::move(bindings)}; + throw "Bad"; } -template -void AddEntry(std::vector& template_entries, u32& binding, - u32& offset, const Container& container) { - static constexpr u32 entry_size = static_cast(sizeof(DescriptorUpdateEntry)); - const u32 count = static_cast(std::size(container)); - - if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { - for (u32 i = 0; i < count; ++i) { - const u32 num_samplers = container[i].size; - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = num_samplers, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - - ++binding; - offset += num_samplers * entry_size; - } - return; - } - - if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || - descriptor_type == STORAGE_TEXEL_BUFFER) { - // Nvidia has a bug where updating multiple texels at once causes the driver to crash. - // Note: Fixed in driver Windows 443.24, Linux 440.66.15 - for (u32 i = 0; i < count; ++i) { - template_entries.push_back({ - .dstBinding = binding + i, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = descriptor_type, - .offset = static_cast(offset + i * entry_size), - .stride = entry_size, - }); - } - } else if (count > 0) { - template_entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = count, - .descriptorType = descriptor_type, - .offset = offset, - .stride = entry_size, - }); - } - offset += count * entry_size; - binding += count; -} - -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries) { - AddEntry(template_entries, offset, binding, entries.const_buffers); - AddEntry(template_entries, offset, binding, entries.global_buffers); - AddEntry(template_entries, offset, binding, entries.uniform_texels); - AddEntry(template_entries, offset, binding, entries.samplers); - AddEntry(template_entries, offset, binding, entries.storage_texels); - AddEntry(template_entries, offset, binding, entries.images); -} +void PipelineCache::OnShaderRemoval(Shader*) {} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -15,15 +15,8 @@ #include #include "common/common_types.h" -#include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" -#include "video_core/renderer_vulkan/vk_shader_decompiler.h" -#include "video_core/shader/async_shaders.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -35,7 +28,7 @@ namespace Vulkan { class Device; class RasterizerVulkan; -class VKComputePipeline; +class ComputePipeline; class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { u32 shared_memory_size; std::array workgroup_size; - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; @@ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v); namespace std { -template <> -struct hash { - std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - template <> struct hash { - std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { + size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { return k.Hash(); } }; @@ -83,66 +69,26 @@ namespace Vulkan { class Shader { public: - explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, - Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); + explicit Shader(); ~Shader(); - - GPUVAddr GetGpuAddr() const { - return gpu_addr; - } - - VideoCommon::Shader::ShaderIR& GetIR() { - return shader_ir; - } - - const VideoCommon::Shader::ShaderIR& GetIR() const { - return shader_ir; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - - const ShaderEntries& GetEntries() const { - return entries; - } - -private: - GPUVAddr gpu_addr{}; - VideoCommon::Shader::ProgramCode program_code; - VideoCommon::Shader::Registry registry; - VideoCommon::Shader::ShaderIR shader_ir; - ShaderEntries entries; }; -class VKPipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: - explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); - ~VKPipelineCache() override; - - std::array GetShaders(); + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~PipelineCache() override; - VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, - u32 num_color_buffers, - VideoCommon::Shader::AsyncShaders& async_shaders); - - VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - - void EmplacePipeline(std::unique_ptr pipeline); + ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); protected: void OnShaderRemoval(Shader* shader) final; private: - std::pair> DecompileShaders( - const FixedPipelineState& fixed_state); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -158,17 +104,8 @@ private: std::array last_shaders{}; - GraphicsPipelineCacheKey last_graphics_key; - VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::mutex pipeline_cache; - std::unordered_map> - graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> compute_cache; }; -void FillDescriptorUpdateTemplateEntries( - const ShaderEntries& entries, u32& binding, u32& offset, - std::vector& template_entries); - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -24,7 +24,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -std::array GetShaderAddresses( - const std::array& shaders) { - std::array addresses; - for (size_t i = 0; i < std::size(addresses); ++i) { - addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; - } - return addresses; -} - struct TextureHandle { constexpr TextureHandle(u32 data, bool via_header_index) { const Tegra::Texture::TextureHandle handle{data}; @@ -117,98 +107,6 @@ struct TextureHandle { u32 sampler; }; -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - size_t stage, size_t index = 0) { - const auto shader_type = static_cast(stage); - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::e2D; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, - ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { - for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const VkSampler sampler = *sampler_ptr++; - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddSampledImage(handle, sampler); - } - } - for ([[maybe_unused]] const auto& entry : entries.storage_texels) { - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - } - for (const auto& entry : entries.images) { - // TODO: Mark as modified - const ImageViewId image_view_id = *image_view_id_ptr++; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); - update_descriptor_queue.AddImage(handle); - } -} - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra descriptor_pool, update_descriptor_queue), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), - wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { + wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } } RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - MICROPROFILE_SCOPE(Vulkan_Drawing); - - SCOPE_EXIT({ gpu.TickWork(); }); - FlushWork(); - - query_cache.UpdateCounters(); - - graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - texture_cache.SynchronizeGraphicsDescriptors(); - texture_cache.UpdateRenderTargets(false); - - const auto shaders = pipeline_cache.GetShaders(); - graphics_key.shaders = GetShaderAddresses(shaders); - - SetupShaderDescriptors(shaders, is_indexed); - - const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - graphics_key.renderpass = framebuffer->RenderPass(); - - VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( - graphics_key, framebuffer->NumColorBuffers(), async_shaders); - if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { - // Async graphics pipeline was not ready. - return; - } - - BeginTransformFeedback(); - - scheduler.RequestRenderpass(framebuffer); - scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - UpdateDynamicStates(); - - const auto& regs = maxwell3d.regs; - const u32 num_instances = maxwell3d.mme_draw.instance_count; - const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); - const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); - const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); - scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); - } else { - cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, - draw_params.base_vertex, draw_params.base_instance); - } - }); - - EndTransformFeedback(); + UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); } void RasterizerVulkan::Clear() { @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { }); } -void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { - MICROPROFILE_SCOPE(Vulkan_Compute); - - query_cache.UpdateCounters(); - - const auto& launch_desc = kepler_compute.launch_description; - auto& pipeline = pipeline_cache.GetComputePipeline({ - .shader = code_addr, - .shared_memory_size = launch_desc.shared_alloc, - .workgroup_size{ - launch_desc.block_dim_x, - launch_desc.block_dim_y, - launch_desc.block_dim_z, - }, - }); - - // Compute dispatches can't be executed inside a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - image_view_indices.clear(); - sampler_handles.clear(); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - - const auto& entries = pipeline.GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeUniformTexels(entries); - SetupComputeTextures(entries); - SetupComputeStorageTexels(entries); - SetupComputeImages(entries); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - update_descriptor_queue.Acquire(); - - buffer_cache.BindHostComputeBuffers(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, - sampler_ptr); - - const VkPipeline pipeline_handle = pipeline.GetHandle(); - const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); - const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); - scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, - grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, - descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - if (descriptor_set) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - DESCRIPTOR_SET, descriptor_set, nullptr); - } - cmdbuf.Dispatch(grid_x, grid_y, grid_z); - }); +void RasterizerVulkan::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 return buffer_cache.DMACopy(src_address, dest_address, amount); } -void RasterizerVulkan::SetupShaderDescriptors( - const std::array& shaders, bool is_indexed) { - image_view_indices.clear(); - sampler_handles.clear(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - const ShaderEntries& entries = shader->GetEntries(); - SetupGraphicsUniformTexels(entries, stage); - SetupGraphicsTextures(entries, stage); - SetupGraphicsStorageTexels(entries, stage); - SetupGraphicsImages(entries, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_buffers) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - update_descriptor_queue.Acquire(); - - ImageViewId* image_view_id_ptr = image_view_ids.data(); - VkSampler* sampler_ptr = sampler_handles.data(); - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - // Skip VertexA stage - Shader* const shader = shaders[stage + 1]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, - image_view_id_ptr, sampler_ptr); - } -} - void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d.regs; UpdateViewportsState(regs); @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } -void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = - GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { - const auto& regs = maxwell3d.regs; - const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : entries.images) { - const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.uniform_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.samplers) { - for (size_t index = 0; index < entry.size; ++index) { - const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, - COMPUTE_SHADER_INDEX, index); - image_view_indices.push_back(handle.image); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - } - } -} - -void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.storage_texels) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : entries.images) { - const TextureHandle handle = - GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); - image_view_indices.push_back(handle.image); - } -} - void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -28,7 +28,6 @@ #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" -#include "video_core/shader/async_shaders.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -73,7 +72,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -103,19 +102,6 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -125,40 +111,12 @@ private: void FlushWork(); - /// Setup descriptors in the graphics pipeline. - void SetupShaderDescriptors(const std::array& shaders, - bool is_indexed); - void UpdateDynamicStates(); void BeginTransformFeedback(); void EndTransformFeedback(); - /// Setup uniform texels in the graphics pipeline. - void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup textures in the graphics pipeline. - void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); - - /// Setup storage texels in the graphics pipeline. - void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); - - /// Setup images in the graphics pipeline. - void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); - - /// Setup texel buffers in the compute pipeline. - void SetupComputeUniformTexels(const ShaderEntries& entries); - - /// Setup textures in the compute pipeline. - void SetupComputeTextures(const ShaderEntries& entries); - - /// Setup storage texels in the compute pipeline. - void SetupComputeStorageTexels(const ShaderEntries& entries); - - /// Setup images in the compute pipeline. - void SetupComputeImages(const ShaderEntries& entries); - void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); @@ -198,13 +156,12 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - VKPipelineCache pipeline_cache; + PipelineCache pipeline_cache; VKQueryCache query_cache; AccelerateDMA accelerate_dma; VKFenceManager fence_manager; vk::Event wfi_event; - VideoCommon::Shader::AsyncShaders async_shaders; boost::container::static_vector image_view_indices; std::array image_view_ids; diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null @@ -1,752 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { - -ASTZipper::ASTZipper() = default; - -void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { - ASSERT(new_first->manager == nullptr); - first = new_first; - last = new_first; - - ASTNode current = first; - while (current) { - current->manager = this; - current->parent = parent; - last = current; - current = current->next; - } -} - -void ASTZipper::PushBack(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous = last; - if (last) { - last->next = new_node; - } - new_node->next.reset(); - last = new_node; - if (!first) { - first = new_node; - } - new_node->manager = this; -} - -void ASTZipper::PushFront(const ASTNode new_node) { - ASSERT(new_node->manager == nullptr); - new_node->previous.reset(); - new_node->next = first; - if (first) { - first->previous = new_node; - } - if (last == first) { - last = new_node; - } - first = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushFront(new_node); - return; - } - const ASTNode next = at_node->next; - if (next) { - next->previous = new_node; - } - new_node->previous = at_node; - if (at_node == last) { - last = new_node; - } - new_node->next = next; - at_node->next = new_node; - new_node->manager = this; -} - -void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { - ASSERT(new_node->manager == nullptr); - if (!at_node) { - PushBack(new_node); - return; - } - const ASTNode previous = at_node->previous; - if (previous) { - previous->next = new_node; - } - new_node->next = at_node; - if (at_node == first) { - first = new_node; - } - new_node->previous = previous; - at_node->previous = new_node; - new_node->manager = this; -} - -void ASTZipper::DetachTail(ASTNode node) { - ASSERT(node->manager == this); - if (node == first) { - first.reset(); - last.reset(); - return; - } - - last = node->previous; - last->next.reset(); - node->previous.reset(); - - ASTNode current = std::move(node); - while (current) { - current->manager = nullptr; - current->parent.reset(); - current = current->next; - } -} - -void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { - ASSERT(start->manager == this && end->manager == this); - if (start == end) { - DetachSingle(start); - return; - } - const ASTNode prev = start->previous; - const ASTNode post = end->next; - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - start->previous.reset(); - end->next.reset(); - ASTNode current = start; - bool found = false; - while (current) { - current->manager = nullptr; - current->parent.reset(); - found |= current == end; - current = current->next; - } - ASSERT(found); -} - -void ASTZipper::DetachSingle(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode prev = node->previous; - const ASTNode post = node->next; - node->previous.reset(); - node->next.reset(); - if (!prev) { - first = post; - } else { - prev->next = post; - } - if (!post) { - last = prev; - } else { - post->previous = prev; - } - - node->manager = nullptr; - node->parent.reset(); -} - -void ASTZipper::Remove(const ASTNode node) { - ASSERT(node->manager == this); - const ASTNode next = node->next; - const ASTNode previous = node->previous; - if (previous) { - previous->next = next; - } - if (next) { - next->previous = previous; - } - node->parent.reset(); - node->manager = nullptr; - if (node == last) { - last = previous; - } - if (node == first) { - first = next; - } -} - -class ExprPrinter final { -public: - void operator()(const ExprAnd& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += "( "; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += "!"; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - inner += fmt::format("P{}", expr.predicate); - } - - void operator()(const ExprCondCode& expr) { - inner += fmt::format("CC{}", expr.cc); - } - - void operator()(const ExprVar& expr) { - inner += fmt::format("V{}", expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(const ExprGprEqual& expr) { - inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string inner; -}; - -class ASTPrinter { -public: - void operator()(const ASTProgram& ast) { - scope++; - inner += "program {\n"; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - inner += "}\n"; - scope--; - } - - void operator()(const ASTIfThen& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}}\n", Indent()); - } - - void operator()(const ASTIfElse& ast) { - inner += Indent(); - inner += "else {\n"; - - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - - inner += Indent(); - inner += "}\n"; - } - - void operator()(const ASTBlockEncoded& ast) { - inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); - } - - void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { - inner += Indent(); - inner += "Block;\n"; - } - - void operator()(const ASTVarSet& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - inner += fmt::format("Label_{}:\n", ast.index); - } - - void operator()(const ASTGoto& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += - fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); - } - - void operator()(const ASTDoWhile& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}do {{\n", Indent()); - scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - scope--; - inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), - ast.kills ? "discard" : "exit"); - } - - void operator()(const ASTBreak& ast) { - ExprPrinter expr_parser{}; - std::visit(expr_parser, *ast.condition); - inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - - const std::string& GetResult() const { - return inner; - } - -private: - std::string_view Indent() { - if (space_segment_scope == scope) { - return space_segment; - } - - // Ensure that we don't exceed our view. - ASSERT(scope * 2 < spaces.size()); - - space_segment = spaces.substr(0, scope * 2); - space_segment_scope = scope; - return space_segment; - } - - std::string inner{}; - std::string_view space_segment; - - u32 scope{}; - u32 space_segment_scope{}; - - static constexpr std::string_view spaces{" "}; -}; - -std::string ASTManager::Print() const { - ASTPrinter printer{}; - printer.Visit(main_node); - return printer.GetResult(); -} - -ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) - : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} - -ASTManager::~ASTManager() { - Clear(); -} - -void ASTManager::Init() { - main_node = ASTBase::Make(ASTNode{}); - program = std::get_if(main_node->GetInnerData()); - false_condition = MakeExpr(false); -} - -void ASTManager::DeclareLabel(u32 address) { - const auto pair = labels_map.emplace(address, labels_count); - if (pair.second) { - labels_count++; - labels.resize(labels_count); - } -} - -void ASTManager::InsertLabel(u32 address) { - const u32 index = labels_map[address]; - const ASTNode label = ASTBase::Make(main_node, index); - labels[index] = label; - program->nodes.PushBack(label); -} - -void ASTManager::InsertGoto(Expr condition, u32 address) { - const u32 index = labels_map[address]; - const ASTNode goto_node = ASTBase::Make(main_node, std::move(condition), index); - gotos.push_back(goto_node); - program->nodes.PushBack(goto_node); -} - -void ASTManager::InsertBlock(u32 start_address, u32 end_address) { - ASTNode block = ASTBase::Make(main_node, start_address, end_address); - program->nodes.PushBack(std::move(block)); -} - -void ASTManager::InsertReturn(Expr condition, bool kills) { - ASTNode node = ASTBase::Make(main_node, std::move(condition), kills); - program->nodes.PushBack(std::move(node)); -} - -// The decompile algorithm is based on -// "Taming control flow: A structured approach to eliminating goto statements" -// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be -// on the same structured level as the label which they jump to. This is done, -// through outward/inward movements and lifting. Once they are at the same -// level, you can enclose them in an "if" structure or a "do-while" structure. -void ASTManager::Decompile() { - auto it = gotos.begin(); - while (it != gotos.end()) { - const ASTNode goto_node = *it; - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - const ASTNode label = labels[*label_index]; - if (!full_decompile) { - // We only decompile backward jumps - if (!IsBackwardsJump(goto_node, label)) { - it++; - continue; - } - } - if (IndirectlyRelated(goto_node, label)) { - while (!DirectlyRelated(goto_node, label)) { - MoveOutward(goto_node); - } - } - if (DirectlyRelated(goto_node, label)) { - u32 goto_level = goto_node->GetLevel(); - const u32 label_level = label->GetLevel(); - while (label_level < goto_level) { - MoveOutward(goto_node); - goto_level--; - } - // TODO(Blinkhawk): Implement Lifting and Inward Movements - } - if (label->GetParent() == goto_node->GetParent()) { - bool is_loop = false; - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label) { - is_loop = true; - break; - } - current = current->GetPrevious(); - } - - if (is_loop) { - EncloseDoWhile(goto_node, label); - } else { - EncloseIfThen(goto_node, label); - } - it = gotos.erase(it); - continue; - } - it++; - } - if (full_decompile) { - for (const ASTNode& label : labels) { - auto& manager = label->GetManager(); - manager.Remove(label); - } - labels.clear(); - } else { - auto label_it = labels.begin(); - while (label_it != labels.end()) { - bool can_remove = true; - ASTNode label = *label_it; - for (const ASTNode& goto_node : gotos) { - const auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return; - } - ASTNode& glabel = labels[*label_index]; - if (glabel == label) { - can_remove = false; - break; - } - } - if (can_remove) { - label->MarkLabelUnused(); - } - } - } -} - -bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { - u32 goto_level = goto_node->GetLevel(); - u32 label_level = label_node->GetLevel(); - while (goto_level > label_level) { - goto_level--; - goto_node = goto_node->GetParent(); - } - while (label_level > goto_level) { - label_level--; - label_node = label_node->GetParent(); - } - while (goto_node->GetParent() != label_node->GetParent()) { - goto_node = goto_node->GetParent(); - label_node = label_node->GetParent(); - } - ASTNode current = goto_node->GetPrevious(); - while (current) { - if (current == label_node) { - return true; - } - current = current->GetPrevious(); - } - return false; -} - -bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { - return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); -} - -bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { - if (first->GetParent() == second->GetParent()) { - return false; - } - const u32 first_level = first->GetLevel(); - const u32 second_level = second->GetLevel(); - u32 min_level; - u32 max_level; - ASTNode max; - ASTNode min; - if (first_level > second_level) { - min_level = second_level; - min = second; - max_level = first_level; - max = first; - } else { - min_level = first_level; - min = first; - max_level = second_level; - max = second; - } - - while (max_level > min_level) { - max_level--; - max = max->GetParent(); - } - - return min->GetParent() == max->GetParent(); -} - -void ASTManager::ShowCurrentState(std::string_view state) const { - LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); - SanityCheck(); -} - -void ASTManager::SanityCheck() const { - for (const auto& label : labels) { - if (!label->GetParent()) { - LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); - } - } -} - -void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode loop_start = label->GetNext(); - if (loop_start == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode parent = label->GetParent(); - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSegment(loop_start, goto_node); - const ASTNode do_while_node = ASTBase::Make(parent, condition); - ASTZipper* sub_zipper = do_while_node->GetSubNodes(); - sub_zipper->Init(loop_start, do_while_node); - zipper.InsertAfter(do_while_node, label); - sub_zipper->Remove(goto_node); -} - -void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode if_end = label->GetPrevious(); - if (if_end == goto_node) { - zipper.Remove(goto_node); - return; - } - const ASTNode prev = goto_node->GetPrevious(); - const Expr condition = goto_node->GetGotoCondition(); - bool do_else = false; - if (!disable_else_derivation && prev->IsIfThen()) { - const Expr if_condition = prev->GetIfCondition(); - do_else = ExprAreEqual(if_condition, condition); - } - const ASTNode parent = label->GetParent(); - zipper.DetachSegment(goto_node, if_end); - ASTNode if_node; - if (do_else) { - if_node = ASTBase::Make(parent); - } else { - Expr neg_condition = MakeExprNot(condition); - if_node = ASTBase::Make(parent, neg_condition); - } - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(goto_node, if_node); - zipper.InsertAfter(if_node, prev); - sub_zipper->Remove(goto_node); -} - -void ASTManager::MoveOutward(ASTNode goto_node) { - ASTZipper& zipper = goto_node->GetManager(); - const ASTNode parent = goto_node->GetParent(); - ASTZipper& zipper2 = parent->GetManager(); - const ASTNode grandpa = parent->GetParent(); - const bool is_loop = parent->IsLoop(); - const bool is_else = parent->IsIfElse(); - const bool is_if = parent->IsIfThen(); - - const ASTNode prev = goto_node->GetPrevious(); - const ASTNode post = goto_node->GetNext(); - - const Expr condition = goto_node->GetGotoCondition(); - zipper.DetachSingle(goto_node); - if (is_loop) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - zipper2.InsertBefore(var_node_init, parent); - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - const ASTNode break_node = ASTBase::Make(parent, var_condition); - zipper.InsertAfter(break_node, var_node); - } else if (is_if || is_else) { - const u32 var_index = NewVariable(); - const Expr var_condition = MakeExpr(var_index); - const ASTNode var_node = ASTBase::Make(parent, var_index, condition); - const ASTNode var_node_init = ASTBase::Make(parent, var_index, false_condition); - if (is_if) { - zipper2.InsertBefore(var_node_init, parent); - } else { - zipper2.InsertBefore(var_node_init, parent->GetPrevious()); - } - zipper.InsertAfter(var_node, prev); - goto_node->SetGotoCondition(var_condition); - if (post) { - zipper.DetachTail(post); - const ASTNode if_node = ASTBase::Make(parent, MakeExprNot(var_condition)); - ASTZipper* sub_zipper = if_node->GetSubNodes(); - sub_zipper->Init(post, if_node); - zipper.InsertAfter(if_node, var_node); - } - } else { - UNREACHABLE(); - } - const ASTNode next = parent->GetNext(); - if (is_if && next && next->IsIfElse()) { - zipper2.InsertAfter(goto_node, next); - goto_node->SetParent(grandpa); - return; - } - zipper2.InsertAfter(goto_node, parent); - goto_node->SetParent(grandpa); -} - -class ASTClearer { -public: - ASTClearer() = default; - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) { - ast.nodes.clear(); - } - - void operator()([[maybe_unused]] const ASTVarSet& ast) {} - - void operator()([[maybe_unused]] const ASTLabel& ast) {} - - void operator()([[maybe_unused]] const ASTGoto& ast) {} - - void operator()(const ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()([[maybe_unused]] const ASTReturn& ast) {} - - void operator()([[maybe_unused]] const ASTBreak& ast) {} - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - node->Clear(); - } -}; - -void ASTManager::Clear() { - if (!main_node) { - return; - } - ASTClearer clearer{}; - clearer.Visit(main_node); - main_node.reset(); - program = nullptr; - labels_map.clear(); - labels.clear(); - gotos.clear(); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "video_core/shader/expr.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -class ASTBase; -class ASTBlockDecoded; -class ASTBlockEncoded; -class ASTBreak; -class ASTDoWhile; -class ASTGoto; -class ASTIfElse; -class ASTIfThen; -class ASTLabel; -class ASTProgram; -class ASTReturn; -class ASTVarSet; - -using ASTData = std::variant; - -using ASTNode = std::shared_ptr; - -enum class ASTZipperType : u32 { - Program, - IfThen, - IfElse, - Loop, -}; - -class ASTZipper final { -public: - explicit ASTZipper(); - - void Init(ASTNode first, ASTNode parent); - - ASTNode GetFirst() const { - return first; - } - - ASTNode GetLast() const { - return last; - } - - void PushBack(ASTNode new_node); - void PushFront(ASTNode new_node); - void InsertAfter(ASTNode new_node, ASTNode at_node); - void InsertBefore(ASTNode new_node, ASTNode at_node); - void DetachTail(ASTNode node); - void DetachSingle(ASTNode node); - void DetachSegment(ASTNode start, ASTNode end); - void Remove(ASTNode node); - - ASTNode first; - ASTNode last; -}; - -class ASTProgram { -public: - ASTZipper nodes{}; -}; - -class ASTIfThen { -public: - explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTIfElse { -public: - ASTZipper nodes{}; -}; - -class ASTBlockEncoded { -public: - explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} - u32 start; - u32 end; -}; - -class ASTBlockDecoded { -public: - explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} - NodeBlock nodes; -}; - -class ASTVarSet { -public: - explicit ASTVarSet(u32 index_, Expr condition_) - : index{index_}, condition{std::move(condition_)} {} - - u32 index; - Expr condition; -}; - -class ASTLabel { -public: - explicit ASTLabel(u32 index_) : index{index_} {} - u32 index; - bool unused{}; -}; - -class ASTGoto { -public: - explicit ASTGoto(Expr condition_, u32 label_) - : condition{std::move(condition_)}, label{label_} {} - - Expr condition; - u32 label; -}; - -class ASTDoWhile { -public: - explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; - ASTZipper nodes{}; -}; - -class ASTReturn { -public: - explicit ASTReturn(Expr condition_, bool kills_) - : condition{std::move(condition_)}, kills{kills_} {} - - Expr condition; - bool kills; -}; - -class ASTBreak { -public: - explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} - Expr condition; -}; - -class ASTBase { -public: - explicit ASTBase(ASTNode parent_, ASTData data_) - : data{std::move(data_)}, parent{std::move(parent_)} {} - - template - static ASTNode Make(ASTNode parent, Args&&... args) { - return std::make_shared(std::move(parent), - ASTData(U(std::forward(args)...))); - } - - void SetParent(ASTNode new_parent) { - parent = std::move(new_parent); - } - - ASTNode& GetParent() { - return parent; - } - - const ASTNode& GetParent() const { - return parent; - } - - u32 GetLevel() const { - u32 level = 0; - auto next_parent = parent; - while (next_parent) { - next_parent = next_parent->GetParent(); - level++; - } - return level; - } - - ASTData* GetInnerData() { - return &data; - } - - const ASTData* GetInnerData() const { - return &data; - } - - ASTNode GetNext() const { - return next; - } - - ASTNode GetPrevious() const { - return previous; - } - - ASTZipper& GetManager() { - return *manager; - } - - const ASTZipper& GetManager() const { - return *manager; - } - - std::optional GetGotoLabel() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->label}; - } - return std::nullopt; - } - - Expr GetGotoCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void MarkLabelUnused() { - if (auto* inner = std::get_if(&data)) { - inner->unused = true; - } - } - - bool IsLabelUnused() const { - if (const auto* inner = std::get_if(&data)) { - return inner->unused; - } - return true; - } - - std::optional GetLabelIndex() const { - if (const auto* inner = std::get_if(&data)) { - return {inner->index}; - } - return std::nullopt; - } - - Expr GetIfCondition() const { - if (const auto* inner = std::get_if(&data)) { - return inner->condition; - } - return nullptr; - } - - void SetGotoCondition(Expr new_condition) { - if (auto* inner = std::get_if(&data)) { - inner->condition = std::move(new_condition); - } - } - - bool IsIfThen() const { - return std::holds_alternative(data); - } - - bool IsIfElse() const { - return std::holds_alternative(data); - } - - bool IsBlockEncoded() const { - return std::holds_alternative(data); - } - - void TransformBlockEncoded(NodeBlock&& nodes) { - data = ASTBlockDecoded(std::move(nodes)); - } - - bool IsLoop() const { - return std::holds_alternative(data); - } - - ASTZipper* GetSubNodes() { - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - if (std::holds_alternative(data)) { - return &std::get_if(&data)->nodes; - } - return nullptr; - } - - void Clear() { - next.reset(); - previous.reset(); - parent.reset(); - manager = nullptr; - } - -private: - friend class ASTZipper; - - ASTData data; - ASTNode parent; - ASTNode next; - ASTNode previous; - ASTZipper* manager{}; -}; - -class ASTManager final { -public: - explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); - ~ASTManager(); - - ASTManager(const ASTManager& o) = delete; - ASTManager& operator=(const ASTManager& other) = delete; - - ASTManager(ASTManager&& other) noexcept = default; - ASTManager& operator=(ASTManager&& other) noexcept = default; - - void Init(); - - void DeclareLabel(u32 address); - - void InsertLabel(u32 address); - - void InsertGoto(Expr condition, u32 address); - - void InsertBlock(u32 start_address, u32 end_address); - - void InsertReturn(Expr condition, bool kills); - - std::string Print() const; - - void Decompile(); - - void ShowCurrentState(std::string_view state) const; - - void SanityCheck() const; - - void Clear(); - - bool IsFullyDecompiled() const { - if (full_decompile) { - return gotos.empty(); - } - - for (ASTNode goto_node : gotos) { - auto label_index = goto_node->GetGotoLabel(); - if (!label_index) { - return false; - } - ASTNode glabel = labels[*label_index]; - if (IsBackwardsJump(goto_node, glabel)) { - return false; - } - } - return true; - } - - ASTNode GetProgram() const { - return main_node; - } - - u32 GetVariables() const { - return variables; - } - - const std::vector& GetLabels() const { - return labels; - } - -private: - bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; - - bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; - - void EncloseDoWhile(ASTNode goto_node, ASTNode label); - - void EncloseIfThen(ASTNode goto_node, ASTNode label); - - void MoveOutward(ASTNode goto_node); - - u32 NewVariable() { - return variables++; - } - - bool full_decompile{}; - bool disable_else_derivation{}; - std::unordered_map labels_map{}; - u32 labels_count{}; - std::vector labels{}; - std::list gotos{}; - u32 variables{}; - ASTProgram* program{}; - ASTNode main_node{}; - Expr false_condition{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/shader/async_shaders.h" - -namespace VideoCommon::Shader { - -AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} - -AsyncShaders::~AsyncShaders() { - KillWorkers(); -} - -void AsyncShaders::AllocateWorkers() { - // Use at least one thread - u32 num_workers = 1; - - // Deduce how many more threads we can use - const u32 thread_count = std::thread::hardware_concurrency(); - if (thread_count >= 8) { - // Increase async workers by 1 for every 2 threads >= 8 - num_workers += 1 + (thread_count - 8) / 2; - } - - // If we already have workers queued, ignore - if (num_workers == worker_threads.size()) { - return; - } - - // If workers already exist, clear them - if (!worker_threads.empty()) { - FreeWorkers(); - } - - // Create workers - for (std::size_t i = 0; i < num_workers; i++) { - context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, - context_list[i].get()); - } -} - -void AsyncShaders::FreeWorkers() { - // Mark all threads to quit - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.join(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -void AsyncShaders::KillWorkers() { - is_thread_exiting.store(true); - cv.notify_all(); - for (auto& thread : worker_threads) { - thread.detach(); - } - // Clear our shared contexts - context_list.clear(); - - // Clear our worker threads - worker_threads.clear(); -} - -bool AsyncShaders::HasWorkQueued() const { - return !pending_queue.empty(); -} - -bool AsyncShaders::HasCompletedWork() const { - std::shared_lock lock{completed_mutex}; - return !finished_work.empty(); -} - -bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { - const auto& regs = gpu.Maxwell3D().regs; - - // If something is using depth, we can assume that games are not rendering anything which will - // be used one time. - if (regs.zeta_enable) { - return true; - } - - // If games are using a small index count, we can assume these are full screen quads. Usually - // these shaders are only used once for building textures so we can assume they can't be built - // async - if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { - return false; - } - - return true; -} - -std::vector AsyncShaders::GetCompletedWork() { - std::vector results; - { - std::unique_lock lock{completed_mutex}; - results = std::move(finished_work); - finished_work.clear(); - } - return results; -} - -void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, - Tegra::Engines::ShaderType shader_type, u64 uid, - std::vector code, std::vector code_b, - u32 main_offset, CompilerSettings compiler_settings, - const Registry& registry, VAddr cpu_addr) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, - .device = &device, - .shader_type = shader_type, - .uid = uid, - .code = std::move(code), - .code_b = std::move(code_b), - .main_offset = main_offset, - .compiler_settings = compiler_settings, - .registry = registry, - .cpu_address = cpu_addr, - .pp_cache = nullptr, - .vk_device = nullptr, - .scheduler = nullptr, - .descriptor_pool = nullptr, - .update_descriptor_queue = nullptr, - .bindings{}, - .program{}, - .key{}, - .num_color_buffers = 0, - }); - cv.notify_one(); -} - -void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, - const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, - Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { - std::unique_lock lock(queue_mutex); - pending_queue.push({ - .backend = Backend::Vulkan, - .device = nullptr, - .shader_type{}, - .uid = 0, - .code{}, - .code_b{}, - .main_offset = 0, - .compiler_settings{}, - .registry{}, - .cpu_address = 0, - .pp_cache = pp_cache, - .vk_device = &device, - .scheduler = &scheduler, - .descriptor_pool = &descriptor_pool, - .update_descriptor_queue = &update_descriptor_queue, - .bindings = std::move(bindings), - .program = std::move(program), - .key = key, - .num_color_buffers = num_color_buffers, - }); - cv.notify_one(); -} - -void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - while (!is_thread_exiting.load(std::memory_order_relaxed)) { - std::unique_lock lock{queue_mutex}; - cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); - if (is_thread_exiting) { - return; - } - - // Partial lock to allow all threads to read at the same time - if (!HasWorkQueued()) { - continue; - } - // Another thread beat us, just unlock and wait for the next load - if (pending_queue.empty()) { - continue; - } - - // Pull work from queue - WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop(); - lock.unlock(); - - if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); - const auto scope = context->Acquire(); - auto program = - OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); - Result result{}; - result.backend = work.backend; - result.cpu_address = work.cpu_address; - result.uid = work.uid; - result.code = std::move(work.code); - result.code_b = std::move(work.code_b); - result.shader_type = work.shader_type; - - if (work.backend == Backend::OpenGL) { - result.program.opengl = std::move(program->source_program); - } else if (work.backend == Backend::GLASM) { - result.program.glasm = std::move(program->assembly_program); - } - - { - std::unique_lock complete_lock(completed_mutex); - finished_work.push_back(std::move(result)); - } - } else if (work.backend == Backend::Vulkan) { - auto pipeline = std::make_unique( - *work.vk_device, *work.scheduler, *work.descriptor_pool, - *work.update_descriptor_queue, work.key, work.bindings, work.program, - work.num_color_buffers); - - work.pp_cache->EmplacePipeline(std::move(pipeline)); - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_vulkan/vk_pipeline_cache.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Core::Frontend { -class EmuWindow; -class GraphicsContext; -} // namespace Core::Frontend - -namespace Tegra { -class GPU; -} - -namespace Vulkan { -class VKPipelineCache; -} - -namespace VideoCommon::Shader { - -class AsyncShaders { -public: - enum class Backend { - OpenGL, - GLASM, - Vulkan, - }; - - struct ResultPrograms { - OpenGL::OGLProgram opengl; - OpenGL::OGLAssemblyProgram glasm; - }; - - struct Result { - u64 uid; - VAddr cpu_address; - Backend backend; - ResultPrograms program; - std::vector code; - std::vector code_b; - Tegra::Engines::ShaderType shader_type; - }; - - explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); - ~AsyncShaders(); - - /// Start up shader worker threads - void AllocateWorkers(); - - /// Clear the shader queue and kill all worker threads - void FreeWorkers(); - - // Force end all threads - void KillWorkers(); - - /// Check to see if any shaders have actually been compiled - [[nodiscard]] bool HasCompletedWork() const; - - /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build - /// every shader async as some shaders are only built and executed once. We try to "guess" which - /// shader would be used only once - [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; - - /// Pulls completed compiled shaders - [[nodiscard]] std::vector GetCompletedWork(); - - void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, - u64 uid, std::vector code, std::vector code_b, u32 main_offset, - CompilerSettings compiler_settings, const Registry& registry, - VAddr cpu_addr); - - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, - Vulkan::VKScheduler& scheduler, - Vulkan::VKDescriptorPool& descriptor_pool, - Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, - std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, - u32 num_color_buffers); - -private: - void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); - - /// Check our worker queue to see if we have any work queued already - [[nodiscard]] bool HasWorkQueued() const; - - struct WorkerParams { - Backend backend; - // For OGL - const OpenGL::Device* device; - Tegra::Engines::ShaderType shader_type; - u64 uid; - std::vector code; - std::vector code_b; - u32 main_offset; - CompilerSettings compiler_settings; - std::optional registry; - VAddr cpu_address; - - // For Vulkan - Vulkan::VKPipelineCache* pp_cache; - const Vulkan::Device* vk_device; - Vulkan::VKScheduler* scheduler; - Vulkan::VKDescriptorPool* descriptor_pool; - Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; - std::vector bindings; - Vulkan::SPIRVProgram program; - Vulkan::GraphicsPipelineCacheKey key; - u32 num_color_buffers; - }; - - std::condition_variable cv; - mutable std::mutex queue_mutex; - mutable std::shared_mutex completed_mutex; - std::atomic is_thread_exiting{}; - std::vector> context_list; - std::vector worker_threads; - std::queue pending_queue; - std::vector finished_work; - Core::Frontend::EmuWindow& emu_window; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/shader/compiler_settings.h" - -namespace VideoCommon::Shader { - -std::string CompileDepthAsString(const CompileDepth cd) { - switch (cd) { - case CompileDepth::BruteForce: - return "Brute Force Compile"; - case CompileDepth::FlowStack: - return "Simple Flow Stack Mode"; - case CompileDepth::NoFlowStack: - return "Remove Flow Stack"; - case CompileDepth::DecompileBackwards: - return "Decompile Backward Jumps"; - case CompileDepth::FullDecompile: - return "Full Decompilation"; - default: - return "Unknown Compiler Process"; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class CompileDepth : u32 { - BruteForce = 0, - FlowStack = 1, - NoFlowStack = 2, - DecompileBackwards = 3, - FullDecompile = 4, -}; - -std::string CompileDepthAsString(CompileDepth cd); - -struct CompilerSettings { - CompileDepth depth{CompileDepth::NoFlowStack}; - bool disable_else_derivation{true}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null @@ -1,751 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -constexpr s32 unassigned_branch = -2; - -struct Query { - u32 address{}; - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -struct BlockStack { - BlockStack() = default; - explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} - std::stack ssy_stack{}; - std::stack pbk_stack{}; -}; - -template -BlockBranchInfo MakeBranchInfo(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool BlockBranchIsIgnored(BlockBranchInfo first) { - bool ignore = false; - if (std::holds_alternative(*first)) { - const auto branch = std::get_if(first.get()); - ignore = branch->ignore; - } - return ignore; -} - -struct BlockInfo { - u32 start{}; - u32 end{}; - bool visited{}; - BlockBranchInfo branch{}; - - bool IsInside(const u32 address) const { - return start <= address && address <= end; - } -}; - -struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) - : program_code{program_code_}, registry{registry_}, start{start_} {} - - const ProgramCode& program_code; - Registry& registry; - u32 start{}; - std::vector block_info; - std::list inspect_queries; - std::list queries; - std::unordered_map registered; - std::set labels; - std::map ssy_labels; - std::map pbk_labels; - std::unordered_map stacks; - ASTManager* manager{}; -}; - -enum class BlockCollision : u32 { None, Found, Inside }; - -std::pair TryGetBlock(CFGRebuildState& state, u32 address) { - const auto& blocks = state.block_info; - for (u32 index = 0; index < blocks.size(); index++) { - if (blocks[index].start == address) { - return {BlockCollision::Found, index}; - } - if (blocks[index].IsInside(address)) { - return {BlockCollision::Inside, index}; - } - } - return {BlockCollision::None, 0xFFFFFFFF}; -} - -struct ParseInfo { - BlockBranchInfo branch_info{}; - u32 end_address{}; -}; - -BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { - auto& it = state.block_info.emplace_back(); - it.start = start; - it.end = end; - const u32 index = static_cast(state.block_info.size() - 1); - state.registered.insert({start, index}); - return it; -} - -Pred GetPredicate(u32 index, bool negated) { - return static_cast(static_cast(index) + (negated ? 8ULL : 0ULL)); -} - -enum class ParseResult : u32 { - ControlCaught, - BlockEnd, - AbnormalFlow, -}; - -struct BranchIndirectInfo { - u32 buffer{}; - u32 offset{}; - u32 entries{}; - s32 relative_position{}; -}; - -struct BufferInfo { - u32 index; - u32 offset; -}; - -std::optional> GetBRXInfo(const CFGRebuildState& state, u32& pos) { - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() != OpCode::Id::BRX) { - return std::nullopt; - } - if (instr.brx.constant_buffer != 0) { - return std::nullopt; - } - --pos; - return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); -} - -template -// requires std::predicate -// requires std::invocable -std::optional TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, - PackCallable pack) { - for (; pos >= state.start; --pos) { - if (IsSchedInstruction(pos, state.start)) { - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (!opcode) { - continue; - } - if (test(instr, opcode->get())) { - --pos; - return std::make_optional(pack(instr, opcode->get())); - } - } - return std::nullopt; -} - -std::optional> TrackLDC(const CFGRebuildState& state, u32& pos, - u64 brx_tracked_register) { - return TrackInstruction>( - state, pos, - [brx_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::LD_C && - instr.gpr0.Value() == brx_tracked_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; - }, - [](auto instr, const auto& opcode) { - const BufferInfo info = {static_cast(instr.cbuf36.index.Value()), - static_cast(instr.cbuf36.GetOffset())}; - return std::make_pair(info, instr.gpr8.Value()); - }); -} - -std::optional TrackSHLRegister(const CFGRebuildState& state, u32& pos, - u64 ldc_tracked_register) { - return TrackInstruction( - state, pos, - [ldc_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::SHL_IMM && - instr.gpr0.Value() == ldc_tracked_register; - }, - [](auto instr, const auto&) { return instr.gpr8.Value(); }); -} - -std::optional TrackIMNMXValue(const CFGRebuildState& state, u32& pos, - u64 shl_tracked_register) { - return TrackInstruction( - state, pos, - [shl_tracked_register](auto instr, const auto& opcode) { - return opcode.GetId() == OpCode::Id::IMNMX_IMM && - instr.gpr0.Value() == shl_tracked_register; - }, - [](auto instr, const auto&) { - return static_cast(instr.alu.GetSignedImm20_20() + 1); - }); -} - -std::optional TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { - const auto brx_info = GetBRXInfo(state, pos); - if (!brx_info) { - return std::nullopt; - } - const auto [relative_position, brx_tracked_register] = *brx_info; - - const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); - if (!ldc_info) { - return std::nullopt; - } - const auto [buffer_info, ldc_tracked_register] = *ldc_info; - - const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); - if (!shl_tracked_register) { - return std::nullopt; - } - - const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); - if (!entries) { - return std::nullopt; - } - - return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; -} - -std::pair ParseCode(CFGRebuildState& state, u32 address) { - u32 offset = static_cast(address); - const u32 end_address = static_cast(state.program_code.size()); - ParseInfo parse_info{}; - SingleBranch single_branch{}; - - const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { - const auto pair = rebuild_state.labels.emplace(label_address); - if (pair.second) { - rebuild_state.inspect_queries.push_back(label_address); - } - }; - - while (true) { - if (offset >= end_address) { - // ASSERT_OR_EXECUTE can't be used, as it ignores the break - ASSERT_MSG(false, "Shader passed the current limit!"); - - single_branch.address = exit_branch; - single_branch.ignore = false; - break; - } - if (state.registered.contains(offset)) { - single_branch.address = offset; - single_branch.ignore = true; - break; - } - if (IsSchedInstruction(offset, state.start)) { - offset++; - continue; - } - const Instruction instr = {state.program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { - offset++; - continue; - } - - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRA: { - if (instr.bra.constant_buffer != 0) { - return {ParseResult::AbnormalFlow, parse_info}; - } - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - const u32 branch_offset = offset + instr.bra.GetBranchTarget(); - if (branch_offset == 0) { - single_branch.address = exit_branch; - } else { - single_branch.address = branch_offset; - } - insert_label(state, branch_offset); - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SYNC: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = true; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::BRK: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = unassigned_branch; - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = true; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::KIL: { - const auto pred_index = static_cast(instr.pred.pred_index); - single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); - if (single_branch.condition.predicate == Pred::NeverExecute) { - offset++; - continue; - } - const ConditionCode cc = instr.flow_condition_code; - single_branch.condition.cc = cc; - if (cc == ConditionCode::F) { - offset++; - continue; - } - single_branch.address = exit_branch; - single_branch.kill = true; - single_branch.is_sync = false; - single_branch.is_brk = false; - single_branch.ignore = false; - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, - single_branch.is_sync, single_branch.is_brk, single_branch.ignore); - - return {ParseResult::ControlCaught, parse_info}; - } - case OpCode::Id::SSY: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.ssy_labels.emplace(offset, target); - break; - } - case OpCode::Id::PBK: { - const u32 target = offset + instr.bra.GetBranchTarget(); - insert_label(state, target); - state.pbk_labels.emplace(offset, target); - break; - } - case OpCode::Id::BRX: { - const auto tmp = TrackBranchIndirectInfo(state, offset); - if (!tmp) { - LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); - return {ParseResult::AbnormalFlow, parse_info}; - } - - const auto result = *tmp; - const s32 pc_target = offset + result.relative_position; - std::vector branches; - for (u32 i = 0; i < result.entries; i++) { - auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); - if (!key) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *key; - u32 target = static_cast((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo( - static_cast(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } - default: - break; - } - - offset++; - } - single_branch.kill = false; - single_branch.is_sync = false; - single_branch.is_brk = false; - parse_info.end_address = offset - 1; - parse_info.branch_info = MakeBranchInfo( - single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, - single_branch.is_brk, single_branch.ignore); - return {ParseResult::BlockEnd, parse_info}; -} - -bool TryInspectAddress(CFGRebuildState& state) { - if (state.inspect_queries.empty()) { - return false; - } - - const u32 address = state.inspect_queries.front(); - state.inspect_queries.pop_front(); - const auto [result, block_index] = TryGetBlock(state, address); - switch (result) { - case BlockCollision::Found: { - return true; - } - case BlockCollision::Inside: { - // This case is the tricky one: - // We need to split the block into 2 separate blocks - const u32 end = state.block_info[block_index].end; - BlockInfo& new_block = CreateBlockInfo(state, address, end); - BlockInfo& current_block = state.block_info[block_index]; - current_block.end = address - 1; - new_block.branch = std::move(current_block.branch); - BlockBranchInfo forward_branch = MakeBranchInfo(); - const auto branch = std::get_if(forward_branch.get()); - branch->address = address; - branch->ignore = true; - current_block.branch = std::move(forward_branch); - return true; - } - default: - break; - } - const auto [parse_result, parse_info] = ParseCode(state, address); - if (parse_result == ParseResult::AbnormalFlow) { - // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction - return false; - } - - BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); - block_info.branch = parse_info.branch_info; - if (std::holds_alternative(*block_info.branch)) { - const auto branch = std::get_if(block_info.branch.get()); - if (branch->condition.IsUnconditional()) { - return true; - } - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); - return true; - } - return true; -} - -bool TryQuery(CFGRebuildState& state) { - const auto gather_labels = [](std::stack& cc, std::map& labels, - BlockInfo& block) { - auto gather_start = labels.lower_bound(block.start); - const auto gather_end = labels.upper_bound(block.end); - while (gather_start != gather_end) { - cc.push(gather_start->second); - ++gather_start; - } - }; - if (state.queries.empty()) { - return false; - } - - Query& q = state.queries.front(); - const u32 block_index = state.registered[q.address]; - BlockInfo& block = state.block_info[block_index]; - // If the block is visited, check if the stacks match, else gather the ssy/pbk - // labels into the current stack and look if the branch at the end of the block - // consumes a label. Schedule new queries accordingly - if (block.visited) { - BlockStack& stack = state.stacks[q.address]; - const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && - (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); - state.queries.pop_front(); - return all_okay; - } - block.visited = true; - state.stacks.insert_or_assign(q.address, BlockStack{q}); - - Query q2(q); - state.queries.pop_front(); - gather_labels(q2.ssy_stack, state.ssy_labels, block); - gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (std::holds_alternative(*block.branch)) { - auto* branch = std::get_if(block.branch.get()); - if (!branch->condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } - - auto& conditional_query = state.queries.emplace_back(q2); - if (branch->is_sync) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.ssy_stack.top(); - } - conditional_query.ssy_stack.pop(); - } - if (branch->is_brk) { - if (branch->address == unassigned_branch) { - branch->address = conditional_query.pbk_stack.top(); - } - conditional_query.pbk_stack.pop(); - } - conditional_query.address = branch->address; - return true; - } - - const auto* multi_branch = std::get_if(block.branch.get()); - for (const auto& branch_case : multi_branch->branches) { - auto& conditional_query = state.queries.emplace_back(q2); - conditional_query.address = branch_case.address; - } - - return true; -} - -void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = [](const Condition& cond) -> Expr { - Expr result; - if (cond.cc != ConditionCode::T) { - result = MakeExpr(cond.cc); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - bool negate = false; - if (pred > 7) { - negate = true; - pred -= 8; - } - Expr extra = MakeExpr(pred); - if (negate) { - extra = MakeExpr(std::move(extra)); - } - if (result) { - return MakeExpr(std::move(extra), std::move(result)); - } - return extra; - } - if (result) { - return result; - } - return MakeExpr(true); - }; - - if (std::holds_alternative(*branch_info)) { - const auto* branch = std::get_if(branch_info.get()); - if (branch->address < 0) { - if (branch->kill) { - mm.InsertReturn(get_expr(branch->condition), true); - return; - } - mm.InsertReturn(get_expr(branch->condition), false); - return; - } - mm.InsertGoto(get_expr(branch->condition), branch->address); - return; - } - const auto* multi_branch = std::get_if(branch_info.get()); - for (const auto& branch_case : multi_branch->branches) { - mm.InsertGoto(MakeExpr(multi_branch->gpr, branch_case.cmp_value), - branch_case.address); - } -} - -void DecompileShader(CFGRebuildState& state) { - state.manager->Init(); - for (auto label : state.labels) { - state.manager->DeclareLabel(label); - } - for (const auto& block : state.block_info) { - if (state.labels.contains(block.start)) { - state.manager->InsertLabel(block.start); - } - const bool ignore = BlockBranchIsIgnored(block.branch); - const u32 end = ignore ? block.end + 1 : block.end; - state.manager->InsertBlock(block.start, end); - if (!ignore) { - InsertBranch(*state.manager, block.branch); - } - } - state.manager->Decompile(); -} - -} // Anonymous namespace - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry) { - auto result_out = std::make_unique(); - if (settings.depth == CompileDepth::BruteForce) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - - CFGRebuildState state{program_code, start_address, registry}; - // Inspect Code and generate blocks - state.labels.clear(); - state.labels.emplace(start_address); - state.inspect_queries.push_back(state.start); - while (!state.inspect_queries.empty()) { - if (!TryInspectAddress(state)) { - result_out->settings.depth = CompileDepth::BruteForce; - return result_out; - } - } - - bool use_flow_stack = true; - - bool decompiled = false; - - if (settings.depth != CompileDepth::FlowStack) { - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; - } - } - } - - use_flow_stack = !decompiled; - - // Sort and organize results - std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); - if (decompiled && settings.depth != CompileDepth::NoFlowStack) { - ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, - settings.disable_else_derivation}; - state.manager = &manager; - DecompileShader(state); - decompiled = state.manager->IsFullyDecompiled(); - if (!decompiled) { - if (settings.depth == CompileDepth::FullDecompile) { - LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); - } else { - LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); - } - state.manager->ShowCurrentState("Of Shader"); - state.manager->Clear(); - } else { - auto characteristics = std::make_unique(); - characteristics->start = start_address; - characteristics->settings.depth = settings.depth; - characteristics->manager = std::move(manager); - characteristics->end = state.block_info.back().end + 1; - return characteristics; - } - } - - result_out->start = start_address; - result_out->settings.depth = - use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; - result_out->blocks.clear(); - for (auto& block : state.block_info) { - ShaderBlock new_block{}; - new_block.start = block.start; - new_block.end = block.end; - new_block.ignore_branch = BlockBranchIsIgnored(block.branch); - if (!new_block.ignore_branch) { - new_block.branch = block.branch; - } - result_out->end = std::max(result_out->end, block.end); - result_out->blocks.push_back(new_block); - } - if (!use_flow_stack) { - result_out->labels = std::move(state.labels); - return result_out; - } - - auto back = result_out->blocks.begin(); - auto next = std::next(back); - while (next != result_out->blocks.end()) { - if (!state.labels.contains(next->start) && next->start == back->end + 1) { - back->end = next->end; - next = result_out->blocks.erase(next); - continue; - } - back = next; - ++next; - } - - return result_out; -} -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -constexpr s32 exit_branch = -1; - -struct Condition { - Pred predicate{Pred::UnusedIndex}; - ConditionCode cc{ConditionCode::T}; - - bool IsUnconditional() const { - return predicate == Pred::UnusedIndex && cc == ConditionCode::T; - } - - bool operator==(const Condition& other) const { - return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); - } - - bool operator!=(const Condition& other) const { - return !operator==(other); - } -}; - -class SingleBranch { -public: - SingleBranch() = default; - explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, - bool is_brk_, bool ignore_) - : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, - ignore{ignore_} {} - - bool operator==(const SingleBranch& b) const { - return std::tie(condition, address, kill, is_sync, is_brk, ignore) == - std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); - } - - bool operator!=(const SingleBranch& b) const { - return !operator==(b); - } - - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; - -struct CaseBranch { - explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} - u32 cmp_value; - u32 address; -}; - -class MultiBranch { -public: - explicit MultiBranch(u32 gpr_, std::vector&& branches_) - : gpr{gpr_}, branches{std::move(branches_)} {} - - u32 gpr{}; - std::vector branches{}; -}; - -using BranchData = std::variant; -using BlockBranchInfo = std::shared_ptr; - -bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); - -struct ShaderBlock { - u32 start{}; - u32 end{}; - bool ignore_branch{}; - BlockBranchInfo branch{}; - - bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch) == - std::tie(sb.start, sb.end, sb.ignore_branch) && - BlockBranchInfoAreEqual(branch, sb.branch); - } - - bool operator!=(const ShaderBlock& sb) const { - return !operator==(sb); - } -}; - -struct ShaderCharacteristics { - std::list blocks{}; - std::set labels{}; - u32 start{}; - u32 end{}; - ASTManager manager{true, true}; - CompilerSettings settings{}; -}; - -std::unique_ptr ScanFlow(const ProgramCode& program_code, u32 start_address, - const CompilerSettings& settings, - Registry& registry); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null @@ -1,368 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/control_flow.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { - -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - ++count; - bound_offsets.emplace_back(sampler.offset); - } - if (count > 1) { - gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - -std::optional TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, - VideoCore::GuestDriverProfile& gpu_driver, - const std::list& used_samplers) { - const u32 base_offset = sampler_to_deduce.offset; - u32 max_offset{std::numeric_limits::max()}; - for (const auto& sampler : used_samplers) { - if (sampler.is_bindless) { - continue; - } - if (sampler.offset > base_offset) { - max_offset = std::min(sampler.offset, max_offset); - } - } - if (max_offset == std::numeric_limits::max()) { - return std::nullopt; - } - return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); -} - -} // Anonymous namespace - -class ASTDecoder { -public: - explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} - - void operator()(ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfThen& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTIfElse& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTBlockEncoded& ast) {} - - void operator()(ASTBlockDecoded& ast) {} - - void operator()(ASTVarSet& ast) {} - - void operator()(ASTLabel& ast) {} - - void operator()(ASTGoto& ast) {} - - void operator()(ASTDoWhile& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(ASTReturn& ast) {} - - void operator()(ASTBreak& ast) {} - - void Visit(ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - if (node->IsBlockEncoded()) { - auto block = std::get_if(node->GetInnerData()); - NodeBlock bb = ir.DecodeRange(block->start, block->end); - node->TransformBlockEncoded(std::move(bb)); - } - } - -private: - ShaderIR& ir; -}; - -void ShaderIR::Decode() { - std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - - decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, registry); - auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - switch (shader_info.settings.depth) { - case CompileDepth::FlowStack: { - for (const auto& block : shader_info.blocks) { - basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); - } - break; - } - case CompileDepth::NoFlowStack: { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast(exit_branch); - for (const auto& block : blocks) { - if (shader_info.labels.contains(block.start)) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - break; - } - case CompileDepth::DecompileBackwards: - case CompileDepth::FullDecompile: { - program_manager = std::move(shader_info.manager); - disable_flow_stack = true; - decompiled = true; - ASTDecoder decoder{*this}; - ASTNode program = GetASTProgram(); - decoder.Visit(program); - break; - } - default: - LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); - [[fallthrough]]; - case CompileDepth::BruteForce: { - const auto shader_end = static_cast(program_code.size()); - coverage_begin = main_offset; - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; ++label) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); - } - break; - } - } - if (settings.depth != shader_info.settings.depth) { - LOG_WARNING( - HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", - CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); - } -} - -NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - NodeBlock basic_block; - DecodeRangeInner(basic_block, begin, end); - return basic_block; -} - -void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { - for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(bb, pc); - } -} - -void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { - Node result = n; - if (cond.cc != ConditionCode::T) { - result = Conditional(GetConditionCode(cond.cc), {result}); - } - if (cond.predicate != Pred::UnusedIndex) { - u32 pred = static_cast(cond.predicate); - const bool is_neg = pred > 7; - if (is_neg) { - pred -= 8; - } - result = Conditional(GetPredicate(pred, is_neg), {result}); - } - return result; - }; - if (std::holds_alternative(*block.branch)) { - auto branch = std::get_if(block.branch.get()); - if (branch->address < 0) { - if (branch->kill) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - Node n = Operation(OperationCode::Branch, Immediate(branch->address)); - n = apply_conditions(branch->condition, n); - bb.push_back(n); - global_code.push_back(n); - return; - } - auto multi_branch = std::get_if(block.branch.get()); - Node op_a = GetRegister(multi_branch->gpr); - for (auto& branch_case : multi_branch->branches) { - Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); - Node op_b = Immediate(branch_case.cmp_value); - Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); - auto result = Conditional(condition, {n}); - bb.push_back(result); - global_code.push_back(result); - } -} - -u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { - // Ignore sched instructions when generating code. - if (IsSchedInstruction(pc, main_offset)) { - return pc + 1; - } - - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - const u32 nv_address = ConvertAddressToNvidiaSpace(pc); - - // Decoding failure - if (!opcode) { - UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); - bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", - nv_address, instr.value))); - return pc + 1; - } - - bb.push_back(Comment( - fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); - - using Tegra::Shader::Pred; - UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, - "NeverExecute predicate not implemented"); - - static const std::map decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, - {OpCode::Type::Image, &ShaderIR::DecodeImage}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; - - std::vector tmp_block; - if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, pc); - } else { - pc = DecodeOther(tmp_block, pc); - } - - // Some instructions (like SSY) don't have a predicate field, they are always unconditionally - // executed. - const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); - const auto pred_index = static_cast(instr.pred.pred_index); - - if (can_be_predicated && pred_index != static_cast(Pred::UnusedIndex)) { - const Node conditional = - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); - global_code.push_back(conditional); - bb.push_back(conditional); - } else { - for (auto& node : tmp_block) { - global_code.push_back(node); - bb.push_back(node); - } - } - - return pc + 1; -} - -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto gpu_driver = registry.AccessGuestDriverProfile(); - DeduceTextureHandlerSize(gpu_driver, used_samplers); - // Deduce Indexed Samplers - if (!uses_indexed_samplers) { - return; - } - for (auto& sampler : used_samplers) { - if (!sampler.is_indexed) { - continue; - } - if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { - sampler.size = *size; - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.size = 1; - } - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::SubOp; - -u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - - Node op_b = [&] { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV_C: - case OpCode::Id::MOV_R: { - // MOV does not have neither 'abs' nor 'neg' bits. - SetRegister(bb, instr.gpr0, op_b); - break; - } - case OpCode::Id::FMUL_C: - case OpCode::Id::FMUL_R: - case OpCode::Id::FMUL_IMM: { - // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. - if (instr.fmul.tab5cb8_2 != 0) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); - } - if (instr.fmul.tab5c68_0 != 1) { - LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); - } - - op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); - - static constexpr std::array FmulPostFactor = { - 1.000f, // None - 0.500f, // Divide 2 - 0.250f, // Divide 4 - 0.125f, // Divide 8 - 8.000f, // Mul 8 - 4.000f, // Mul 4 - 2.000f, // Mul 2 - }; - - if (instr.fmul.postfactor != 0) { - op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, - Immediate(FmulPostFactor[instr.fmul.postfactor])); - } - - // TODO(Rodrigo): Should precise be used when there's a postfactor? - Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); - - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD_C: - case OpCode::Id::FADD_R: - case OpCode::Id::FADD_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::MUFU: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - - Node value = [&]() { - switch (instr.sub_op) { - case SubOp::Cos: - return Operation(OperationCode::FCos, PRECISE, op_a); - case SubOp::Sin: - return Operation(OperationCode::FSin, PRECISE, op_a); - case SubOp::Ex2: - return Operation(OperationCode::FExp2, PRECISE, op_a); - case SubOp::Lg2: - return Operation(OperationCode::FLog2, PRECISE, op_a); - case SubOp::Rcp: - return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); - case SubOp::Rsq: - return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); - case SubOp::Sqrt: - return Operation(OperationCode::FSqrt, PRECISE, op_a); - default: - UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); - return Immediate(0); - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FMNMX_C: - case OpCode::Id::FMNMX_R: - case OpCode::Id::FMNMX_IMM: { - op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - - const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); - - const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); - const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FCMP_RR: - case OpCode::Id::FCMP_RC: - case OpCode::Id::FCMP_IMMR: { - UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); - Node op_c = GetRegister(instr.gpr39); - Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); - break; - } - case OpCode::Id::RRO_C: - case OpCode::Id::RRO_R: - case OpCode::Id::RRO_IMM: { - LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); - - // Currently RRO is only implemented as a register move. - op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); - SetRegister(bb, instr.gpr0, op_b); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - bool negate_a = false; - bool negate_b = false; - bool absolute_a = false; - bool absolute_b = false; - - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_R: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HADD2_C: - if (instr.alu_half.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - negate_a = ((instr.value >> 43) & 1) != 0; - negate_b = ((instr.value >> 56) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - case OpCode::Id::HMUL2_R: - negate_a = ((instr.value >> 43) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 30) & 1) != 0; - break; - case OpCode::Id::HMUL2_C: - negate_b = ((instr.value >> 31) & 1) != 0; - absolute_a = ((instr.value >> 44) & 1) != 0; - absolute_b = ((instr.value >> 54) & 1) != 0; - break; - default: - UNREACHABLE(); - break; - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); - op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); - - auto [type_b, op_b] = [this, instr, opcode]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HMUL2_C: - return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HADD2_R: - case OpCode::Id::HMUL2_R: - return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; - default: - UNREACHABLE(); - return {HalfType::F32, Immediate(0)}; - } - }(); - op_b = UnpackHalfFloat(op_b, type_b); - op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); - - Node value = [this, opcode, op_a, op_b = op_b] { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_C: - case OpCode::Id::HADD2_R: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_C: - case OpCode::Id::HMUL2_R: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); - return Immediate(0); - } - }(); - value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); - - const Node op_b = UnpackHalfImmediate(instr, true); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::HADD2_IMM: - return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); - case OpCode::Id::HMUL2_IMM: - return Operation(OperationCode::HMul, PRECISE, op_a, op_b); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); - SetRegister(bb, instr.gpr0, value); - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::MOV32_IMM: { - SetRegister(bb, instr.gpr0, GetImmediate32(instr)); - break; - } - case OpCode::Id::FMUL32_IMM: { - Node value = - Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); - value = GetSaturatedFloat(value, instr.fmul32.saturate); - - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FADD32I: { - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, - instr.fadd32i.negate_a); - const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, - instr.fadd32i.negate_b); - - const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null @@ -1,375 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::IAdd3Height; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD_C: - case OpCode::Id::IADD_R: - case OpCode::Id::IADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); - UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - Node value = Operation(OperationCode::UAdd, op_a, op_b); - - if (instr.iadd.x) { - Node carry = GetInternalFlag(InternalFlag::Carry); - Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); - value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); - } - - if (instr.generates_cc) { - const Node i0 = Immediate(0); - - Node zero = Operation(OperationCode::LogicalIEqual, value, i0); - Node sign = Operation(OperationCode::LogicalILessThan, value, i0); - Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); - - Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); - Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); - Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); - Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); - - SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); - SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); - SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); - SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::IADD3_C: - case OpCode::Id::IADD3_R: - case OpCode::Id::IADD3_IMM: { - Node op_c = GetRegister(instr.gpr39); - - const auto ApplyHeight = [&](IAdd3Height height, Node value) { - switch (height) { - case IAdd3Height::None: - return value; - case IAdd3Height::LowerHalfWord: - return BitfieldExtract(value, 0, 16); - case IAdd3Height::UpperHalfWord: - return BitfieldExtract(value, 16, 16); - default: - UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); - return Immediate(0); - } - }; - - if (opcode->get().GetId() == OpCode::Id::IADD3_R) { - op_a = ApplyHeight(instr.iadd3.height_a, op_a); - op_b = ApplyHeight(instr.iadd3.height_b, op_b); - op_c = ApplyHeight(instr.iadd3.height_c, op_c); - } - - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); - op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); - - const Node value = [&] { - Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); - if (opcode->get().GetId() != OpCode::Id::IADD3_R) { - return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); - } - const Node shifted = [&] { - switch (instr.iadd3.mode) { - case Tegra::Shader::IAdd3Mode::RightShift: - // TODO(tech4me): According to - // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 - // The addition between op_a and op_b should be done in uint33, more - // investigation required - return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, - Immediate(16)); - case Tegra::Shader::IAdd3Mode::LeftShift: - return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, - Immediate(16)); - default: - return add_ab; - } - }(); - return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); - }(); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ISCADD_C: - case OpCode::Id::ISCADD_R: - case OpCode::Id::ISCADD_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in ISCADD is not implemented"); - - op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); - op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); - - const Node shift = Immediate(static_cast(instr.alu_integer.shift_amount)); - const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::POPC_C: - case OpCode::Id::POPC_R: - case OpCode::Id::POPC_IMM: { - if (instr.popc.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - } - const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::FLO_R: - case OpCode::Id::FLO_C: - case OpCode::Id::FLO_IMM: { - Node value; - if (instr.flo.invert) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.is_signed) { - value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); - } else { - value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); - } - if (instr.flo.sh) { - value = - Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); - } - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::SEL_C: - case OpCode::Id::SEL_R: - case OpCode::Id::SEL_IMM: { - const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); - const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::ICMP_CR: - case OpCode::Id::ICMP_R: - case OpCode::Id::ICMP_RC: - case OpCode::Id::ICMP_IMM: { - const Node zero = Immediate(0); - - const auto [op_rhs, test] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_R: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::ICMP_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::ICMP_IMM: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {zero, zero}; - } - }(); - const Node op_lhs = GetRegister(instr.gpr8); - const Node comparison = - GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); - break; - } - case OpCode::Id::LOP_C: - case OpCode::Id::LOP_R: - case OpCode::Id::LOP_IMM: { - if (instr.alu.lop.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); - if (instr.alu.lop.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, - instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, - instr.generates_cc); - break; - } - case OpCode::Id::LOP3_C: - case OpCode::Id::LOP3_R: - case OpCode::Id::LOP3_IMM: { - const Node op_c = GetRegister(instr.gpr39); - const Node lut = [&]() { - if (opcode->get().GetId() == OpCode::Id::LOP3_R) { - return Immediate(instr.alu.lop3.GetImmLut28()); - } else { - return Immediate(instr.alu.lop3.GetImmLut48()); - } - }(); - - WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); - break; - } - case OpCode::Id::IMNMX_C: - case OpCode::Id::IMNMX_R: - case OpCode::Id::IMNMX_IMM: { - UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); - - const bool is_signed = instr.imnmx.is_signed; - - const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); - const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); - const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); - const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::LEA_R2: - case OpCode::Id::LEA_R1: - case OpCode::Id::LEA_IMM: - case OpCode::Id::LEA_RZ: - case OpCode::Id::LEA_HI: { - auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::LEA_R2: { - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), - Immediate(static_cast(instr.lea.r2.entry_a))}; - } - case OpCode::Id::LEA_R1: { - const bool neg = instr.lea.r1.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - GetRegister(instr.gpr20), - Immediate(static_cast(instr.lea.r1.entry_a))}; - } - case OpCode::Id::LEA_IMM: { - const bool neg = instr.lea.imm.neg != 0; - return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.imm.entry_a)), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - case OpCode::Id::LEA_RZ: { - const bool neg = instr.lea.rz.neg != 0; - return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), - GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), - Immediate(static_cast(instr.lea.rz.entry_a))}; - } - case OpCode::Id::LEA_HI: - default: - UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); - - return {Immediate(static_cast(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), - Immediate(static_cast(instr.lea.imm.entry_b))}; - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast(Pred::UnusedIndex), - "Unhandled LEA Predicate"); - - Node value = - Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); - value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); - SetRegister(bb, instr.gpr0, std::move(value)); - - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, - Node imm_lut, bool sets_cc) { - const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { - Node value = Immediate(0); - const ImmediateNode imm = std::get(*ttbl); - if (imm.GetValue() & 0x01) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x02) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x04) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x08) { - const Node a = Operation(OperationCode::IBitwiseNot, na); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x10) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x20) { - const Node b = Operation(OperationCode::IBitwiseNot, nb); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x40) { - const Node c = Operation(OperationCode::IBitwiseNot, nc); - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - if (imm.GetValue() & 0x80) { - Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); - r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); - value = Operation(OperationCode::IBitwiseOr, value, r); - } - return value; - }(op_a, op_b, op_c, imm_lut); - - SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); - SetRegister(bb, dest, lop3_fast); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::LogicOperation; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredicateResultMode; -using Tegra::Shader::Register; - -u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = Immediate(static_cast(instr.alu.imm20_32)); - - switch (opcode->get().GetId()) { - case OpCode::Id::IADD32I: { - UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - - op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - - Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) { - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); - } - - if (instr.alu.lop32i.invert_b) { - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); - } - - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), - std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, - instr.op_32.generates_cc != 0); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", - opcode->get().GetName()); - } - - return pc; -} - -void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, - Node op_b, PredicateResultMode predicate_mode, Pred predicate, - bool sets_cc) { - Node result = [&] { - switch (logic_op) { - case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); - case LogicOperation::PassB: - return op_b; - default: - UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); - return Immediate(0); - } - }(); - - SetInternalFlagsFromInteger(bb, result, sets_cc); - SetRegister(bb, dest, result); - - // Write the predicate value depending on the predicate mode. - switch (predicate_mode) { - case PredicateResultMode::None: - // Do nothing. - return; - case PredicateResultMode::NotZero: { - // Set the predicate to true if the result is not zero. - Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); - SetPredicate(bb, static_cast(predicate), std::move(compare)); - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_R: - return GetRegister(instr.gpr20); - case OpCode::Id::BFE_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::BFE_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - - const bool is_signed = instr.bfe.is_signed; - - // using reverse parallel method in - // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel - // note for later if possible to implement faster method. - if (instr.bfe.brev) { - const auto swap = [&](u32 s, u32 mask) { - Node v1 = - SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); - if (mask != 0) { - v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), - Immediate(mask)); - } - Node v2 = op_a; - if (mask != 0) { - v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), - Immediate(mask)); - } - v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), - Immediate(s)); - return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), - std::move(v2)); - }; - op_a = swap(1, 0x55555555U); - op_a = swap(2, 0x33333333U); - op_a = swap(4, 0x0F0F0F0FU); - op_a = swap(8, 0x00FF00FFU); - op_a = swap(16, 0); - } - - const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(0), Immediate(8)); - const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, - Immediate(8), Immediate(8)); - auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); - SetRegister(bb, instr.gpr0, std::move(result)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto [packed_shift, base] = [&]() -> std::pair { - switch (opcode->get().GetId()) { - case OpCode::Id::BFI_RC: - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::BFI_IMM_R: - return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; - default: - UNREACHABLE(); - return {Immediate(0), Immediate(0)}; - } - }(); - const Node insert = GetRegister(instr.gpr8); - const Node offset = BitfieldExtract(packed_shift, 0, 8); - const Node bits = BitfieldExtract(packed_shift, 8, 8); - - const Node value = - Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null @@ -1,321 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; - -namespace { - -constexpr OperationCode GetFloatSelector(u64 selector) { - return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; -} - -constexpr u32 SizeInBits(Register::Size size) { - switch (size) { - case Register::Size::Byte: - return 8; - case Register::Size::Short: - return 16; - case Register::Size::Word: - return 32; - case Register::Size::Long: - return 64; - } - return 0; -} - -constexpr std::optional> IntegerSaturateBounds(Register::Size src_size, - Register::Size dst_size, - bool src_signed, - bool dst_signed) { - const u32 dst_bits = SizeInBits(dst_size); - if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { - if (src_signed == dst_signed) { - return std::nullopt; - } - return std::make_pair(0, std::numeric_limits::max()); - } - if (dst_signed) { - // Signed destination, clamp to [-128, 127] for instance - return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); - } else { - // Unsigned destination - if (dst_bits == 32) { - // Avoid shifting by 32, that is undefined behavior - return std::make_pair(0, s32(std::numeric_limits::max())); - } - return std::make_pair(0, (1 << dst_bits) - 1); - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - case OpCode::Id::I2I_C: - case OpCode::Id::I2I_IMM: { - const bool src_signed = instr.conversion.is_input_signed; - const bool dst_signed = instr.conversion.is_output_signed; - const Register::Size src_size = instr.conversion.src_size; - const Register::Size dst_size = instr.conversion.dst_size; - const u32 selector = static_cast(instr.conversion.int_src.selector); - - Node value = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2I_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - // Ensure the source selector is valid - switch (instr.conversion.src_size) { - case Register::Size::Byte: - break; - case Register::Size::Short: - ASSERT(selector == 0 || selector == 2); - break; - default: - ASSERT(selector == 0); - break; - } - - if (src_size != Register::Size::Word || selector != 0) { - value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), - Immediate(selector * 8), Immediate(SizeInBits(src_size))); - } - - value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, - instr.conversion.negate_a, src_signed); - - if (instr.alu.saturate_d) { - if (src_signed && !dst_signed) { - Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, - Immediate(1 << (SizeInBits(src_size) - 1))); - value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), - std::move(value)); - - // Simplify generated expressions, this can be removed without semantic impact - SetTemporary(bb, 0, std::move(value)); - value = GetTemporary(0); - - if (dst_size != Register::Size::Word) { - const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); - Node is_large = - Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); - value = Operation(OperationCode::Select, std::move(is_large), limit, - std::move(value)); - } - } else if (const std::optional bounds = - IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { - value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), - Immediate(bounds->first)); - value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), - Immediate(bounds->second)); - } - } else if (dst_size != Register::Size::Word) { - // No saturation, we only have to mask the result - Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); - value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(value)); - break; - } - case OpCode::Id::I2F_R: - case OpCode::Id::I2F_C: - case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in I2F is not implemented"); - - Node value = [&] { - switch (opcode->get().GetId()) { - case OpCode::Id::I2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::I2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::I2F_IMM: - return Immediate(instr.alu.GetSignedImm20_20()); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const bool input_signed = instr.conversion.is_input_signed; - - if (const u32 offset = static_cast(instr.conversion.int_src.selector); offset > 0) { - ASSERT(instr.conversion.src_size == Register::Size::Byte || - instr.conversion.src_size == Register::Size::Short); - if (instr.conversion.src_size == Register::Size::Short) { - ASSERT(offset == 0 || offset == 2); - } - value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, - std::move(value), Immediate(offset * 8)); - } - - value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); - value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); - value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); - value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2F_R: - case OpCode::Id::F2F_C: - case OpCode::Id::F2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2F is not implemented"); - - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2F_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2F_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2F_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&] { - if (instr.conversion.src_size != instr.conversion.dst_size) { - // Rounding operations only matter when the source and destination conversion size - // is the same. - return value; - } - switch (instr.conversion.f2f.GetRoundingMode()) { - case Tegra::Shader::F2fRoundingOp::None: - return value; - case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, value); - case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, value); - case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, value); - case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", - instr.conversion.f2f.rounding.Value()); - return value; - } - }(); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - - if (instr.conversion.dst_size == Register::Size::Short) { - value = Operation(OperationCode::HCastFloat, PRECISE, value); - } - - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::F2I_R: - case OpCode::Id::F2I_C: - case OpCode::Id::F2I_IMM: { - UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in F2I is not implemented"); - Node value = [&]() { - switch (opcode->get().GetId()) { - case OpCode::Id::F2I_R: - return GetRegister(instr.gpr20); - case OpCode::Id::F2I_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::F2I_IMM: - return GetImmediate19(instr); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - if (instr.conversion.src_size == Register::Size::Short) { - value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, - std::move(value)); - } else { - ASSERT(instr.conversion.float_src.selector == 0); - } - - value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - - value = [&]() { - switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::RoundEven: - return Operation(OperationCode::FRoundEven, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); - case Tegra::Shader::F2iRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); - default: - UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", - instr.conversion.f2i.rounding.Value()); - return Immediate(0); - } - }(); - const bool is_signed = instr.conversion.is_output_signed; - value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); - value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); - - SetRegister(bb, instr.gpr0, value); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); - if (instr.ffma.tab5980_0 != 1) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); - } - if (instr.ffma.tab5980_1 != 0) { - LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); - } - - const Node op_a = GetRegister(instr.gpr8); - - auto [op_b, op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - } - case OpCode::Id::FFMA_RR: - return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::FFMA_RC: { - return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - } - case OpCode::Id::FFMA_IMM: - return {GetImmediate19(instr), GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); - return {Immediate(0), Immediate(0)}; - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); - op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); - - Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedFloat(value, instr.alu.saturate_d); - - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, - instr.fset.neg_a != 0); - - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); - - // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the - // condition is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fset.op); - const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.fset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, - instr.fsetp.neg_a != 0); - Node op_b = [&]() { - if (instr.is_b_imm) { - return GetImmediate19(instr); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); - - // We can't use the constant predicate as destination. - ASSERT(instr.fsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node predicate = - GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); - const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); - const Node value = Operation(combiner, predicate, second_pred); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.fsetp.pred3, value); - - if (instr.fsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - const Node second_value = Operation(combiner, negated_pred, second_pred); - SetPredicate(bb, instr.fsetp.pred0, second_value); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - PredCondition cond{}; - bool bf = false; - bool ftz = false; - bool neg_a = false; - bool abs_a = false; - bool neg_b = false; - bool abs_b = false; - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - case OpCode::Id::HSET2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - bf = instr.Bit(53); - ftz = instr.Bit(54); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(56); - abs_b = instr.Bit(54); - break; - case OpCode::Id::HSET2_R: - cond = instr.hsetp2.reg.cond; - bf = instr.Bit(49); - ftz = instr.Bit(50); - neg_a = instr.Bit(43); - abs_a = instr.Bit(44); - neg_b = instr.Bit(31); - abs_b = instr.Bit(30); - break; - default: - UNREACHABLE(); - } - - Node op_b = [this, instr, opcode] { - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_C: - // Inform as unimplemented as this is not tested. - UNIMPLEMENTED_MSG("HSET2_C is not implemented"); - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - case OpCode::Id::HSET2_R: - return GetRegister(instr.gpr20); - case OpCode::Id::HSET2_IMM: - return UnpackHalfImmediate(instr, true); - default: - UNREACHABLE(); - return Node{}; - } - }(); - - if (!ftz) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); - - switch (opcode->get().GetId()) { - case OpCode::Id::HSET2_R: - op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); - [[fallthrough]]; - case OpCode::Id::HSET2_C: - op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); - break; - default: - break; - } - - Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); - - Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); - - // HSET2 operates on each half float in the pack. - std::array values; - for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = bf ? 0x3c00 : 0xffff; - Node true_value = Immediate(raw_value << (i * 16)); - Node false_value = Immediate(0); - - Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - Node predicate = Operation(combiner, comparison, second_pred); - values[i] = - Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); - } - - Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); - SetRegister(bb, instr.gpr0, move(value)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (instr.hsetp2.ftz != 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); - } - - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); - - Tegra::Shader::PredCondition cond{}; - bool h_and{}; - Node op_b{}; - switch (opcode->get().GetId()) { - case OpCode::Id::HSETP2_C: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); - // F32 is hardcoded in hardware - op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); - break; - case OpCode::Id::HSETP2_IMM: - cond = instr.hsetp2.cbuf_and_imm.cond; - h_and = instr.hsetp2.cbuf_and_imm.h_and; - op_b = UnpackHalfImmediate(instr, true); - break; - case OpCode::Id::HSETP2_R: - cond = instr.hsetp2.reg.cond; - h_and = instr.hsetp2.reg.h_and; - op_b = - GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), - instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); - break; - default: - UNREACHABLE(); - op_b = Immediate(0); - } - - const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); - const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); - - const auto Write = [&](u64 dest, Node src) { - SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); - }; - - const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); - const u64 first = instr.hsetp2.pred3; - const u64 second = instr.hsetp2.pred0; - if (h_and) { - Node joined = Operation(OperationCode::LogicalAnd2, comparison); - Write(first, joined); - Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); - } else { - Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); - Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::HalfPrecision; -using Tegra::Shader::HalfType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { - DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); - } else { - DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); - } - - constexpr auto identity = HalfType::H0_H1; - bool neg_b{}, neg_c{}; - auto [saturate, type_b, op_b, type_c, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::HFMA2_CR: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, HalfType::F32, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_RC: - neg_b = instr.hfma2.negate_b; - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::HFMA2_RR: - neg_b = instr.hfma2.rr.negate_b; - neg_c = instr.hfma2.rr.negate_c; - return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), - instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; - case OpCode::Id::HFMA2_IMM_R: - neg_c = instr.hfma2.negate_c; - return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), - instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; - default: - return {false, identity, Immediate(0), identity, Immediate(0)}; - } - }(); - - const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); - op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); - op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); - - Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); - value = GetSaturatedHalfFloat(value, saturate); - value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/textures/texture.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; -using Tegra::Shader::StoreType; -using Tegra::Texture::ComponentType; -using Tegra::Texture::TextureFormat; -using Tegra::Texture::TICEntry; - -namespace { - -ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, - std::size_t component) { - const TextureFormat format{descriptor.format}; - switch (format) { - case TextureFormat::R16G16B16A16: - case TextureFormat::R32G32B32A32: - case TextureFormat::R32G32B32: - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.b_type; - } - if (component == 3) { - return descriptor.a_type; - } - break; - case TextureFormat::A8R8G8B8: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.r_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.b_type; - } - break; - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - if (component == 0) { - return descriptor.a_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - if (component == 3) { - return descriptor.r_type; - } - break; - case TextureFormat::R32_B24G8: - if (component == 0) { - return descriptor.r_type; - } - if (component == 1) { - return descriptor.b_type; - } - if (component == 2) { - return descriptor.g_type; - } - break; - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - if (component == 0) { - return descriptor.b_type; - } - if (component == 1) { - return descriptor.g_type; - } - if (component == 2) { - return descriptor.r_type; - } - break; - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - if (component == 0) { - return descriptor.g_type; - } - if (component == 1) { - return descriptor.r_type; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return ComponentType::FLOAT; -} - -bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), - (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask)}.test(component); -} - -u32 GetComponentSize(TextureFormat format, std::size_t component) { - switch (format) { - case TextureFormat::R32G32B32A32: - return 32; - case TextureFormat::R16G16B16A16: - return 16; - case TextureFormat::R32G32B32: - return component <= 2 ? 32 : 0; - case TextureFormat::R32G32: - return component <= 1 ? 32 : 0; - case TextureFormat::R16G16: - return component <= 1 ? 16 : 0; - case TextureFormat::R32: - return component == 0 ? 32 : 0; - case TextureFormat::R16: - return component == 0 ? 16 : 0; - case TextureFormat::R8: - return component == 0 ? 8 : 0; - case TextureFormat::R1: - return component == 0 ? 1 : 0; - case TextureFormat::A8R8G8B8: - return 8; - case TextureFormat::A2B10G10R10: - return (component == 3 || component == 2 || component == 1) ? 10 : 2; - case TextureFormat::A4B4G4R4: - return 4; - case TextureFormat::A5B5G5R1: - return (component == 0 || component == 1 || component == 2) ? 5 : 1; - case TextureFormat::A1B5G5R5: - return (component == 1 || component == 2 || component == 3) ? 5 : 1; - case TextureFormat::R32_B24G8: - if (component == 0) { - return 32; - } - if (component == 1) { - return 24; - } - if (component == 2) { - return 8; - } - return 0; - case TextureFormat::B5G6R5: - if (component == 0 || component == 2) { - return 5; - } - if (component == 1) { - return 6; - } - return 0; - case TextureFormat::B6G5R5: - if (component == 1 || component == 2) { - return 5; - } - if (component == 0) { - return 6; - } - return 0; - case TextureFormat::B10G11R11: - if (component == 1 || component == 2) { - return 11; - } - if (component == 0) { - return 10; - } - return 0; - case TextureFormat::R24G8: - if (component == 0) { - return 8; - } - if (component == 1) { - return 24; - } - return 0; - case TextureFormat::R8G24: - if (component == 0) { - return 24; - } - if (component == 1) { - return 8; - } - return 0; - case TextureFormat::R8G8: - return (component == 0 || component == 1) ? 8 : 0; - case TextureFormat::G4R4: - return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; - } -} - -std::size_t GetImageComponentMask(TextureFormat format) { - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - switch (format) { - case TextureFormat::R32G32B32A32: - case TextureFormat::R16G16B16A16: - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A4B4G4R4: - case TextureFormat::A5B5G5R1: - case TextureFormat::A1B5G5R5: - return std::size_t{R | G | B | A}; - case TextureFormat::R32G32B32: - case TextureFormat::R32_B24G8: - case TextureFormat::B5G6R5: - case TextureFormat::B6G5R5: - case TextureFormat::B10G11R11: - return std::size_t{R | G | B}; - case TextureFormat::R32G32: - case TextureFormat::R16G16: - case TextureFormat::R24G8: - case TextureFormat::R8G24: - case TextureFormat::R8G8: - case TextureFormat::G4R4: - return std::size_t{R | G}; - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R8: - case TextureFormat::R1: - return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; - } -} - -std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - case Tegra::Shader::ImageType::TextureBuffer: - return 1; - case Tegra::Shader::ImageType::Texture1DArray: - case Tegra::Shader::ImageType::Texture2D: - return 2; - case Tegra::Shader::ImageType::Texture2DArray: - case Tegra::Shader::ImageType::Texture3D: - return 3; - } - UNREACHABLE(); - return 1; -} -} // Anonymous namespace - -std::pair ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, - Node original_value) { - switch (component_type) { - case ComponentType::SNORM: { - // range [-1.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) / 2.f - 1.f)); - cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); - return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; - } - case ComponentType::SINT: - case ComponentType::UNORM: { - bool is_signed = component_type == ComponentType::SINT; - // range [0.0, 1.0] - auto cnv_value = Operation(OperationCode::FMul, original_value, - Immediate(static_cast(1 << component_size) - 1.f)); - return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), - is_signed}; - } - case ComponentType::UINT: // range [0, (1 << component_size) - 1] - return {std::move(original_value), false}; - case ComponentType::FLOAT: - if (component_size == 16) { - return {Operation(OperationCode::HCastFloat, original_value), true}; - } else { - return {std::move(original_value), true}; - } - default: - UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); - return {std::move(original_value), true}; - } -} - -u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { - std::vector coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; - coords.reserve(num_coords); - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - return coords; - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkRead(); - - if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; - } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { - UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && - instr.suldst.GetStoreDataLayout() != StoreType::Bits64); - - auto descriptor = [this, instr] { - std::optional sampler_descriptor; - if (instr.suldst.is_immediate) { - sampler_descriptor = - registry.ObtainBoundSampler(static_cast(instr.image.index.Value())); - } else { - const Node image_register = GetRegister(instr.gpr39); - const auto result = TrackCbuf(image_register, global_code, - static_cast(global_code.size())); - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); - } - if (!sampler_descriptor) { - UNREACHABLE_MSG("Failed to obtain image descriptor"); - } - return *sampler_descriptor; - }(); - - const auto comp_mask = GetImageComponentMask(descriptor.format); - - switch (instr.suldst.GetStoreDataLayout()) { - case StoreType::Bits32: - case StoreType::Bits64: { - u32 indexer = 0; - u32 shifted_counter = 0; - Node value = Immediate(0); - for (u32 element = 0; element < 4; ++element) { - if (!IsComponentEnabled(comp_mask, element)) { - continue; - } - const auto component_type = GetComponentType(descriptor, element); - const auto component_size = GetComponentSize(descriptor.format, element); - MetaImage meta{image, {}, element}; - - auto [converted_value, is_signed] = GetComponentValue( - component_type, component_size, - Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); - - // shift element to correct position - const auto shifted = shifted_counter; - if (shifted > 0) { - converted_value = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, - std::move(converted_value), Immediate(shifted)); - } - shifted_counter += component_size; - - // add value into result - value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); - - // if we shifted enough for 1 byte -> we save it into temp - if (shifted_counter >= 32) { - SetTemporary(bb, indexer++, std::move(value)); - // reset counter and value to prepare pack next byte - value = Immediate(0); - shifted_counter = 0; - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNREACHABLE(); - break; - } - } - break; - } - case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != - Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA - - std::vector values; - constexpr std::size_t hardcoded_size{4}; - for (std::size_t i = 0; i < hardcoded_size; ++i) { - values.push_back(GetRegister(instr.gpr0.Value() + i)); - } - - const auto type{instr.suldst.image_type}; - auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; - image.MarkWrite(); - - MetaImage meta{image, std::move(values)}; - bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); - break; - } - case OpCode::Id::SUATOM: { - UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - - const OperationCode operation_code = [instr] { - switch (instr.suatom_d.operation_type) { - case Tegra::Shader::ImageAtomicOperationType::S32: - case Tegra::Shader::ImageAtomicOperationType::U32: - switch (instr.suatom_d.operation) { - case Tegra::Shader::ImageAtomicOperation::Add: - return OperationCode::AtomicImageAdd; - case Tegra::Shader::ImageAtomicOperation::And: - return OperationCode::AtomicImageAnd; - case Tegra::Shader::ImageAtomicOperation::Or: - return OperationCode::AtomicImageOr; - case Tegra::Shader::ImageAtomicOperation::Xor: - return OperationCode::AtomicImageXor; - case Tegra::Shader::ImageAtomicOperation::Exch: - return OperationCode::AtomicImageExchange; - default: - break; - } - break; - default: - break; - } - UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", - static_cast(instr.suatom_d.operation.Value()), - static_cast(instr.suatom_d.operation_type.Value())); - return OperationCode::AtomicImageAdd; - }(); - - Node value = GetRegister(instr.gpr0); - - const auto type = instr.suatom_d.image_type; - auto& image = GetImage(instr.image, type); - image.MarkAtomic(); - - MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset = static_cast(image.index.Value()); - - const auto it = - std::find_if(std::begin(used_images), std::end(used_images), - [offset](const ImageEntry& entry) { return entry.offset == offset; }); - if (it != std::end(used_images)) { - ASSERT(!it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, type); -} - -ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register = GetRegister(reg); - const auto result = - TrackCbuf(image_register, global_code, static_cast(global_code.size())); - - const auto buffer = std::get<1>(result); - const auto offset = std::get<2>(result); - - const auto it = std::find_if(std::begin(used_images), std::end(used_images), - [buffer, offset](const ImageEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != std::end(used_images)) { - ASSERT(it->is_bindless && it->type == type); - return *it; - } - - const auto next_index = static_cast(used_images.size()); - return used_images.emplace_back(next_index, offset, buffer, type); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition - // is true, and to 0 otherwise. - const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); - const Node first_pred = - GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); - - const OperationCode combiner = GetPredicateCombiner(instr.iset.op); - - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); - const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - const Node op_a = GetRegister(instr.gpr8); - - const Node op_b = [&]() { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - // We can't use the constant predicate as destination. - ASSERT(instr.isetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); - const Node predicate = - GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); - const Node value = Operation(combiner, predicate, second_pred); - SetPredicate(bb, instr.isetp.pred3, value); - - if (instr.isetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled - const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); - SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::AtomicOp; -using Tegra::Shader::AtomicType; -using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicType; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::StoreType; - -namespace { - -OperationCode GetAtomOperation(AtomicOp op) { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", op); - return OperationCode::AtomicIAdd; - } -} - -bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { - return uniform_type == Tegra::Shader::UniformType::UnsignedByte || - uniform_type == Tegra::Shader::UniformType::UnsignedShort; -} - -u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 0b11; - case Tegra::Shader::UniformType::UnsignedShort: - return 0b10; - default: - UNREACHABLE(); - return 0; - } -} - -u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { - switch (uniform_type) { - case Tegra::Shader::UniformType::UnsignedByte: - return 8; - case Tegra::Shader::UniformType::UnsignedShort: - return 16; - case Tegra::Shader::UniformType::Single: - return 32; - case Tegra::Shader::UniformType::Double: - return 64; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 128; - default: - UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); - return 32; - } -} - -Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); -} - -Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), - Immediate(size)); -} - -Node Sign16Extend(Node value) { - Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); - Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::LD_A: { - // Note: Shouldn't this be interp mode flat? As in no interpolation made. - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && - instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, - "Non-32 bits PHYS reads are not implemented"); - - const Node buffer{GetRegister(instr.gpr39)}; - - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast(instr.attribute.fmt20.index.Value()); - - const auto LoadNextElement = [&](u32 reg_offset) { - const Node attribute{instr.attribute.fmt20.IsPhysical() - ? GetPhysicalInputAttribute(instr.gpr8, buffer) - : GetInputAttribute(static_cast(next_index), - next_element, buffer)}; - - SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); - - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - LoadNextElement(reg_offset); - } - break; - } - case OpCode::Id::LD_C: { - UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - - Node index = GetRegister(instr.gpr8); - - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - - switch (instr.ld_c.type.Value()) { - case Tegra::Shader::UniformType::Single: - SetRegister(bb, instr.gpr0, op_a); - break; - - case Tegra::Shader::UniformType::Double: { - const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); - - SetTemporary(bb, 0, op_a); - SetTemporary(bb, 1, op_b); - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); - } - break; - } - case OpCode::Id::LD_L: - LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); - [[fallthrough]]; - case OpCode::Id::LD_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate_offset = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); - }; - const auto GetMemory = [&](s32 offset) { - return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) - : GetLocalMemory(GetAddress(offset)); - }; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Signed16: - SetRegister(bb, instr.gpr0, - Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); - break; - case StoreType::Bits32: - case StoreType::Bits64: - case StoreType::Bits128: { - const u32 count = [&] { - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits32: - return 1; - case StoreType::Bits64: - return 2; - case StoreType::Bits128: - return 4; - default: - UNREACHABLE(); - return 0; - } - }(); - for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i, GetMemory(i * 4)); - } - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::LD: - case OpCode::Id::LDG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::LD: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); - return instr.generic.type; - case OpCode::Id::LDG: - return instr.ldg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, false); - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - if (!real_address_base || !base_address) { - // Tracking failed, load zeroes. - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); - } - break; - } - - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - Node gmem = MakeNode(real_address, base_address, descriptor); - - // To handle unaligned loads get the bytes used to dereference global memory and extract - // those bytes from the loaded u32. - if (IsUnaligned(type)) { - gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); - } - - SetTemporary(bb, i, gmem); - } - - for (u32 i = 0; i < count; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::ST_A: { - UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, - "Indirect attribute loads are not supported"); - UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, - "Unaligned attribute loads are not supported"); - - u64 element = instr.attribute.fmt20.element; - auto index = static_cast(instr.attribute.fmt20.index.Value()); - - const u32 num_words = static_cast(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - Node dest; - if (instr.attribute.fmt20.patch) { - const u32 offset = static_cast(index) * 4 + static_cast(element); - dest = MakeNode(offset); - } else { - dest = GetOutputAttribute(static_cast(index), element, - GetRegister(instr.gpr39)); - } - const auto src = GetRegister(instr.gpr0.Value() + reg_offset); - - bb.push_back(Operation(OperationCode::Assign, dest, src)); - - // Load the next attribute element into the following register. If the element to load - // goes beyond the vec4 size, load the first element of the next attribute. - element = (element + 1) % 4; - index = index + (element == 0 ? 1 : 0); - } - break; - } - case OpCode::Id::ST_L: - LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); - [[fallthrough]]; - case OpCode::Id::ST_S: { - const auto GetAddress = [&](s32 offset) { - ASSERT(offset % 4 == 0); - const Node immediate = Immediate(static_cast(instr.smem_imm) + offset); - return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); - }; - - const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; - const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; - const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; - - switch (instr.ldst_sl.type.Value()) { - case StoreType::Bits128: - (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); - (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); - [[fallthrough]]; - case StoreType::Bits64: - (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); - [[fallthrough]]; - case StoreType::Bits32: - (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); - break; - case StoreType::Unsigned16: - case StoreType::Signed16: { - Node address = GetAddress(0); - Node memory = (this->*get_memory)(address); - (this->*set_memory)( - bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); - break; - } - default: - UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), - instr.ldst_sl.type.Value()); - } - break; - } - case OpCode::Id::ST: - case OpCode::Id::STG: { - const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { - switch (opcode->get().GetId()) { - case OpCode::Id::ST: - UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); - return instr.generic.type; - case OpCode::Id::STG: - return instr.stg.type; - default: - UNREACHABLE(); - return {}; - } - }(); - - // For unaligned reads we have to read memory too. - const bool is_read = IsUnaligned(type); - const auto [real_address_base, base_address, descriptor] = - TrackGlobalMemory(bb, instr, is_read, true); - if (!real_address_base || !base_address) { - // Tracking failed, skip the store. - break; - } - - const u32 size = GetMemorySize(type); - const u32 count = Common::AlignUp(size, 32) / 32; - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); - const Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0.Value() + i); - - if (IsUnaligned(type)) { - const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, move(value), real_address, mask, size); - } - - bb.push_back(Operation(OperationCode::Assign, gmem, value)); - } - break; - } - case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", - instr.red.type.Value()); - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - Node gmem = MakeNode(real_address, base_address, descriptor); - Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); - break; - } - case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || - instr.atom.operation == AtomicOp::Dec || - instr.atom.operation == AtomicOp::SafeAdd, - "operation={}", instr.atom.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64 || - instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || - instr.atom.type == GlobalAtomicType::F32_FTZ_RN, - "type={}", instr.atom.type.Value()); - - const auto [real_address, base_address, descriptor] = - TrackGlobalMemory(bb, instr, true, true); - if (!real_address || !base_address) { - // Tracking failed, skip atomic. - break; - } - - const bool is_signed = - instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; - Node gmem = MakeNode(real_address, base_address, descriptor); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || - instr.atoms.operation == AtomicOp::Dec, - "operation={}", instr.atoms.operation.Value()); - UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || - instr.atoms.type == AtomicType::U64, - "type={}", instr.atoms.type.Value()); - const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; - const s32 offset = instr.atoms.GetImmediateOffset(); - Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); - SetRegister(bb, instr.gpr0, - SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, - GetSharedMemory(move(address)), GetRegister(instr.gpr20))); - break; - } - case OpCode::Id::AL2P: { - // Ignore al2p.direction since we don't care about it. - - // Calculate emulation fake physical address. - const Node fixed_address{Immediate(static_cast(instr.al2p.address))}; - const Node reg{GetRegister(instr.gpr8)}; - const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; - - // Set the fake address to target register. - SetRegister(bb, instr.gpr0, fake_address); - - // Signal the shader IR to declare all possible attributes and varyings - uses_physical_attributes = true; - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -std::tuple ShaderIR::TrackGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_read, bool is_write) { - const auto addr_register{GetRegister(instr.gmem.gpr)}; - const auto immediate_offset{static_cast(instr.gmem.offset)}; - - const auto [base_address, index, offset] = - TrackCbuf(addr_register, global_code, static_cast(global_code.size())); - ASSERT_OR_EXECUTE_MSG( - base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, - "Global memory tracking failed"); - - bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); - - const GlobalMemoryBase descriptor{index, offset}; - const auto& entry = used_global_memory.try_emplace(descriptor).first; - auto& usage = entry->second; - usage.is_written |= is_write; - usage.is_read |= is_read; - - const auto real_address = - Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); - - return {real_address, base_address, descriptor}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::OpCode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::SystemVariable; - -using Index = Tegra::Shader::Attribute::Index; - -u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::NOP: { - UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); - UNIMPLEMENTED_IF(instr.nop.trigger != 0); - // With the previous preconditions, this instruction is a no-operation. - break; - } - case OpCode::Id::EXIT: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); - - switch (instr.flow.cond) { - case Tegra::Shader::FlowCondition::Always: - bb.push_back(Operation(OperationCode::Exit)); - if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { - // If this is an unconditional exit then just end processing here, - // otherwise we have to account for the possibility of the condition - // not being met, so continue processing the next instruction. - pc = MAX_PROGRAM_LENGTH - 1; - } - break; - - case Tegra::Shader::FlowCondition::Fcsm_Tr: - // TODO(bunnei): What is this used for? If we assume this conditon is not - // satisifed, dual vertex shaders in Farming Simulator make more sense - UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); - break; - - default: - UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); - } - break; - } - case OpCode::Id::KIL: { - UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); - - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); - - bb.push_back(Operation(OperationCode::Discard)); - break; - } - case OpCode::Id::S2R: { - const Node value = [this, instr] { - switch (instr.sys20) { - case SystemVariable::LaneId: - return Operation(OperationCode::ThreadId); - case SystemVariable::InvocationId: - return Operation(OperationCode::InvocationId); - case SystemVariable::Ydirection: - uses_y_negate = true; - return Operation(OperationCode::YNegate); - case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0x00ff'0000U); - case SystemVariable::WscaleFactorXY: - UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); - return Immediate(0U); - case SystemVariable::WscaleFactorZ: - UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); - return Immediate(0U); - case SystemVariable::Tid: { - Node val = Immediate(0); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); - val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); - return val; - } - case SystemVariable::TidX: - return Operation(OperationCode::LocalInvocationIdX); - case SystemVariable::TidY: - return Operation(OperationCode::LocalInvocationIdY); - case SystemVariable::TidZ: - return Operation(OperationCode::LocalInvocationIdZ); - case SystemVariable::CtaIdX: - return Operation(OperationCode::WorkGroupIdX); - case SystemVariable::CtaIdY: - return Operation(OperationCode::WorkGroupIdY); - case SystemVariable::CtaIdZ: - return Operation(OperationCode::WorkGroupIdZ); - case SystemVariable::EqMask: - case SystemVariable::LtMask: - case SystemVariable::LeMask: - case SystemVariable::GtMask: - case SystemVariable::GeMask: - uses_warps = true; - switch (instr.sys20) { - case SystemVariable::EqMask: - return Operation(OperationCode::ThreadEqMask); - case SystemVariable::LtMask: - return Operation(OperationCode::ThreadLtMask); - case SystemVariable::LeMask: - return Operation(OperationCode::ThreadLeMask); - case SystemVariable::GtMask: - return Operation(OperationCode::ThreadGtMask); - case SystemVariable::GeMask: - return Operation(OperationCode::ThreadGeMask); - default: - UNREACHABLE(); - return Immediate(0u); - } - default: - UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); - return Immediate(0u); - } - }(); - SetRegister(bb, instr.gpr0, value); - - break; - } - case OpCode::Id::BRA: { - Node branch; - if (instr.bra.constant_buffer == 0) { - const u32 target = pc + instr.bra.GetBranchTarget(); - branch = Operation(OperationCode::Branch, Immediate(target)); - } else { - const u32 target = pc + 1; - const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - const Node operand = - Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - branch = Operation(OperationCode::BranchIndirect, operand); - } - - const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; - if (cc != Tegra::Shader::ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::BRX: { - Node operand; - if (instr.brx.constant_buffer != 0) { - const s32 target = pc + 1; - const Node index = GetRegister(instr.gpr8); - const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } else { - const s32 target = pc + instr.brx.GetBranchExtend(); - const Node op_a = GetRegister(instr.gpr8); - const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, - PRECISE, op_a, Immediate(3)); - operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); - } - const Node branch = Operation(OperationCode::BranchIndirect, operand); - - const ConditionCode cc = instr.flow_condition_code; - if (cc != ConditionCode::T) { - bb.push_back(Conditional(GetConditionCode(cc), {branch})); - } else { - bb.push_back(branch); - } - break; - } - case OpCode::Id::SSY: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer flow is not supported"); - - if (disable_flow_stack) { - break; - } - - // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); - break; - } - case OpCode::Id::PBK: { - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer PBK is not supported"); - - if (disable_flow_stack) { - break; - } - - // PBK pushes to a stack the address where BRK will jump to. - const u32 target = pc + instr.bra.GetBranchTarget(); - bb.push_back( - Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); - break; - } - case OpCode::Id::SYNC: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); - - if (decompiled) { - break; - } - - // The SYNC opcode jumps to the address previously set by the SSY opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); - break; - } - case OpCode::Id::BRK: { - const ConditionCode cc = instr.flow_condition_code; - UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); - if (decompiled) { - break; - } - - // The BRK opcode jumps to the address previously set by the PBK opcode - bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); - break; - } - case OpCode::Id::IPA: { - const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Index index = attribute.index; - - Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(index, attribute.element); - - // Code taken from Ryujinx. - if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { - const u32 location = static_cast(index) - static_cast(Index::Attribute_0); - if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { - Node position_w = GetInputAttribute(Index::Position, 3); - value = Operation(OperationCode::FMul, move(value), move(position_w)); - } - } - - if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); - } - - value = GetSaturatedFloat(move(value), instr.ipa.saturate); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::OUT_R: { - UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, - "Stream buffer is not supported"); - - if (instr.out.emit) { - // gpr0 is used to store the next address and gpr8 contains the address to emit. - // Hardware uses pointers here but we just ignore it - bb.push_back(Operation(OperationCode::EmitVertex)); - SetRegister(bb, instr.gpr0, Immediate(0)); - } - if (instr.out.cut) { - bb.push_back(Operation(OperationCode::EndPrimitive)); - } - break; - } - case OpCode::Id::ISBERD: { - UNIMPLEMENTED_IF(instr.isberd.o != 0); - UNIMPLEMENTED_IF(instr.isberd.skew != 0); - UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); - UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); - LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); - SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); - break; - } - case OpCode::Id::BAR: { - UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); - bb.push_back(Operation(OperationCode::Barrier)); - break; - } - case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - const OperationCode type = [instr] { - switch (instr.membar.type) { - case Tegra::Shader::MembarType::CTA: - return OperationCode::MemoryBarrierGroup; - case Tegra::Shader::MembarType::GL: - return OperationCode::MemoryBarrierGlobal; - default: - UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); - return OperationCode::MemoryBarrierGlobal; - } - }(); - bb.push_back(Operation(type)); - break; - } - case OpCode::Id::DEPBAR: { - LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; - -u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - switch (opcode->get().GetId()) { - case OpCode::Id::PSETP: { - const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); - - // We can't use the constant predicate as destination. - ASSERT(instr.psetp.pred3 != static_cast(Pred::UnusedIndex)); - - const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); - const Node predicate = Operation(combiner, op_a, op_b); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); - - if (instr.psetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled - SetPredicate(bb, instr.psetp.pred0, - Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), - second_pred)); - } - break; - } - case OpCode::Id::CSETP: { - const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); - const Node condition_code = GetConditionCode(instr.csetp.cc); - - const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); - - if (instr.csetp.pred3 != static_cast(Pred::UnusedIndex)) { - SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); - } - if (instr.csetp.pred0 != static_cast(Pred::UnusedIndex)) { - const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); - SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in PSET is not implemented"); - - const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); - const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); - const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); - - const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); - - const OperationCode combiner = GetPredicateCombiner(instr.pset.op); - const Node predicate = Operation(combiner, first_pred, second_pred); - - const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); - const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); - const Node value = - Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); - - if (instr.pset.bf) { - SetInternalFlagsFromFloat(bb, value, instr.generates_cc); - } else { - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - } - SetRegister(bb, instr.gpr0, value); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; - -namespace { -constexpr u64 NUM_CONDITION_CODES = 4; -constexpr u64 NUM_PREDICATES = 7; -} // namespace - -u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node apply_mask = [this, opcode, instr] { - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: - case OpCode::Id::P2R_IMM: - return Immediate(static_cast(instr.p2r_r2p.immediate_mask)); - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - const u32 offset = static_cast(instr.p2r_r2p.byte) * 8; - - const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; - const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; - const auto get_entry = [this, cc](u64 entry) { - return cc ? GetInternalFlag(static_cast(entry)) : GetPredicate(entry); - }; - - switch (opcode->get().GetId()) { - case OpCode::Id::R2P_IMM: { - Node mask = GetRegister(instr.gpr8); - - for (u64 entry = 0; entry < num_entries; ++entry) { - const u32 shift = static_cast(entry); - - Node apply = BitfieldExtract(apply_mask, shift, 1); - Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); - - Node compare = BitfieldExtract(mask, offset + shift, 1); - Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); - - Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); - bb.push_back(Conditional(condition, {move(code)})); - } - break; - } - case OpCode::Id::P2R_IMM: { - Node value = Immediate(0); - for (u64 entry = 0; entry < num_entries; ++entry) { - Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), - Immediate(0)); - value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); - } - value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); - value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::ShfType; -using Tegra::Shader::ShfXmode; - -namespace { - -Node IsFull(Node shift) { - return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); -} - -Node Shift(OperationCode opcode, Node value, Node shift) { - Node shifted = Operation(opcode, move(value), shift); - return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); -} - -Node ClampShift(Node shift, s32 size = 32) { - shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); - return Operation(OperationCode::IMin, move(shift), Immediate(size)); -} - -Node WrapShift(Node shift, s32 size = 32) { - return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); -} - -Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); - Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); - } - - // And these when it's larger than or 32 - const bool is_signed = type == ShfType::S64; - const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(opcode, high, move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { - // These values are used when the shift value is less than 32 - Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); - Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); - Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); - - if (type == ShfType::Bits32) { - // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits - return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); - } - - // And these when it's larger than or 32 - Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); - Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); - - Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); - Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); - - Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); - return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = [this, instr] { - if (instr.is_b_imm) { - return Immediate(instr.alu.GetSignedImm20_20()); - } else if (instr.is_b_gpr) { - return GetRegister(instr.gpr20); - } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); - } - }(); - - switch (const auto opid = opcode->get().GetId(); opid) { - case OpCode::Id::SHR_C: - case OpCode::Id::SHR_R: - case OpCode::Id::SHR_IMM: { - op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); - - Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, - move(op_a), move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHL_C: - case OpCode::Id::SHL_R: - case OpCode::Id::SHL_IMM: { - Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, move(value)); - break; - } - case OpCode::Id::SHF_RIGHT_R: - case OpCode::Id::SHF_RIGHT_IMM: - case OpCode::Id::SHF_LEFT_R: - case OpCode::Id::SHF_LEFT_IMM: { - UNIMPLEMENTED_IF(instr.generates_cc); - UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", - instr.shf.xmode.Value()); - - if (instr.is_b_imm) { - op_b = Immediate(static_cast(instr.shf.immediate)); - } - const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; - Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); - - Node negated_shift = Operation(OperationCode::INegate, shift); - Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); - - const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; - Node value = (is_right ? ShiftRight : ShiftLeft)( - move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); - - SetRegister(bb, instr.gpr0, move(value)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null @@ -1,935 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); - return 0; - } -} - -u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - bool is_bindless = false; - switch (opcode->get().GetId()) { - case OpCode::Id::TEX: { - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); - break; - } - case OpCode::Id::TEX_B: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const TextureType texture_type{instr.tex_b.texture_type}; - const bool is_array = instr.tex_b.array != 0; - const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); - const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex_b.GetTextureProcessMode(); - WriteTexInstructionFloat(bb, instr, - GetTexCode(instr, texture_type, process_mode, depth_compare, - is_array, is_aoffi, {instr.gpr20})); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4_B: { - is_bindless = true; - [[fallthrough]]; - } - case OpCode::Id::TLD4: { - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) - : instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) - : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) - : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, - is_ptp, is_bindless)); - break; - } - case OpCode::Id::TLD4S: { - constexpr std::size_t num_coords = 2; - const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); - const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector coords; - std::vector aoffi; - Node depth_compare; - if (is_depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - if (is_aoffi) { - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - depth_compare = GetRegister(instr.gpr20.Value() + 1); - } else { - depth_compare = op_b; - } - } else { - // There's no depth compare - coords.push_back(op_a); - if (is_aoffi) { - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - aoffi = GetAoffiCoordinates(op_b, num_coords, true); - } else { - coords.push_back(op_b); - } - } - const Node component = Immediate(static_cast(instr.tld4s.component)); - - SamplerInfo info; - info.is_shadow = is_depth_compare; - const std::optional sampler = GetSampler(instr.sampler, info); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, - {}, {}, component, element, {}}; - values[element] = Operation(OperationCode::TextureGather, meta, coords); - } - - if (instr.tld4s.fp16_flag) { - WriteTexsInstructionHalfFloat(bb, instr, values, true); - } else { - WriteTexsInstructionFloat(bb, instr, values, true); - } - break; - } - case OpCode::Id::TXD_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXD: { - UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - const bool is_array = instr.txd.is_array != 0; - const auto derivate_reg = instr.gpr20.Value(); - const auto texture_type = instr.txd.texture_type.Value(); - const auto coord_count = GetCoordCount(texture_type); - u64 base_reg = instr.gpr8.Value(); - Node index_var; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - const std::optional sampler = - is_bindless ? GetBindlessSampler(base_reg, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); - WriteTexInstructionFloat(bb, instr, values); - break; - } - - if (is_bindless) { - base_reg++; - } - - std::vector coords; - std::vector derivates; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(base_reg + i)); - const std::size_t derivate = i * 2; - derivates.push_back(GetRegister(derivate_reg + derivate)); - derivates.push_back(GetRegister(derivate_reg + derivate + 1)); - } - - Node array_node = {}; - if (is_array) { - const Node info_reg = GetRegister(base_reg + coord_count); - array_node = BitfieldExtract(info_reg, 0, 16); - } - - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, - {}, {}, {}, element, index_var}; - values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); - } - - WriteTexInstructionFloat(bb, instr, values); - - break; - } - case OpCode::Id::TXQ_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TXQ: { - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) - : GetSampler(instr.sampler, {}); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, - GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); - } - break; - } - case OpCode::Id::TMML_B: - is_bindless = true; - [[fallthrough]]; - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - const auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) - : GetSampler(instr.sampler, info); - - if (!sampler) { - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - const Node value = Immediate(0); - SetTemporary(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - - const u64 base_index = is_array ? 1 : 0; - const u64 num_components = [texture_type] { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); - return 2; - } - }(); - // TODO: What's the array component used for? - - std::vector coords; - coords.reserve(num_components); - for (u64 component = 0; component < num_components; ++component) { - coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); - } - - u32 indexer = 0; - for (u32 element = 0; element < 2; ++element) { - if (!instr.tmml.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - Node value = Operation(OperationCode::TextureQueryLod, meta, coords); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } - break; - } - case OpCode::Id::TLD: { - UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); - break; - } - case OpCode::Id::TLDS: { - const TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - const Node4 components = GetTldsCode(instr, texture_type, is_array); - - if (instr.tlds.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( - SamplerInfo info, std::optional sampler) { - if (info.IsComplete()) { - return info; - } - if (!sampler) { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); - info.is_array = info.is_array.value_or(false); - info.is_shadow = info.is_shadow.value_or(false); - info.is_buffer = info.is_buffer.value_or(false); - return info; - } - info.type = info.type.value_or(sampler->texture_type); - info.is_array = info.is_array.value_or(sampler->is_array != 0); - info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); - info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); - return info; -} - -std::optional ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, - SamplerInfo sampler_info) { - const u32 offset = static_cast(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [offset](const SamplerEntry& entry) { return entry.offset == offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); -} - -std::optional ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, - SamplerInfo info, Node& index_var) { - const Node sampler_register = GetRegister(reg); - const auto [base_node, tracked_sampler_info] = - TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); - if (!base_node) { - UNREACHABLE(); - return std::nullopt; - } - - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 buffer = sampler_info->index; - const u32 offset = sampler_info->offset; - info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer, offset](const SamplerEntry& entry) { - return entry.buffer == buffer && entry.offset == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow); - return *it; - } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, false); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const std::pair indices = sampler_info->indices; - const std::pair offsets = sampler_info->offsets; - info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); - - // Try to use an already created sampler if it exists - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [indices, offsets](const SamplerEntry& entry) { - return offsets == std::pair{entry.offset, entry.secondary_offset} && - indices == std::pair{entry.buffer, entry.secondary_buffer}; - }); - if (it != used_samplers.end()) { - ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); - return *it; - } - - // Otherwise create a new mapping for this sampler - const u32 next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer); - } - if (const auto sampler_info = std::get_if(&*tracked_sampler_info)) { - const u32 base_offset = sampler_info->base_offset / 4; - index_var = GetCustomVariable(sampler_info->bindless_var); - info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); - - // If this sampler has already been used, return the existing mapping. - const auto it = std::find_if( - used_samplers.begin(), used_samplers.end(), - [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); - if (it != used_samplers.end()) { - ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && - it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && - it->is_indexed); - return *it; - } - - uses_indexed_samplers = true; - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, - *info.is_shadow, *info.is_buffer, true); - } - return std::nullopt; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporary(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, - bool ignore_mask) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - SetTemporary(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components, bool ignore_mask) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporary(bb, 0, first_value); - SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporary(0)); - SetRegister(bb, instr.gpr28, GetTemporary(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, - std::vector aoffi, - std::optional bindless_reg) { - const bool is_array = array != nullptr; - const bool is_shadow = depth_compare != nullptr; - const bool is_bindless = bindless_reg.has_value(); - - ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, - "Illegal texture type"); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = is_shadow; - info.is_buffer = false; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) - : GetSampler(instr.sampler, info); - if (!sampler) { - return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; - } - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; - - Node bias; - Node lod; - switch (process_mode) { - case TextureProcessMode::None: - break; - case TextureProcessMode::LZ: - lod = Immediate(0.0f); - break; - case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 field with - // an offset depending on the usage of the other registers. - bias = GetRegister(instr.gpr20.Value() + bias_offset); - break; - case TextureProcessMode::LL: - lod = GetRegister(instr.gpr20.Value() + bias_offset); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); - break; - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, - lod, {}, element, index_var}; - values[element] = Operation(opcode, meta, coords); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - bool is_aoffi, std::optional bindless_reg) { - const bool lod_bias_enabled{ - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; - - const bool is_bindless = bindless_reg.has_value(); - - u64 parameter_register = instr.gpr20.Value(); - if (is_bindless) { - ++parameter_register; - } - - const u32 bias_lod_offset = (is_bindless ? 1 : 0); - if (lod_bias_enabled) { - ++parameter_register; - } - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 5); - const auto coord_count = std::get<0>(coord_counts); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - std::vector aoffi; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); - } - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - dc = GetRegister(parameter_register++); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, - aoffi, bindless_reg); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, - lod_bias_enabled, 4, 4); - const auto coord_count = std::get<0>(coord_counts); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, - {}); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { - ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); - - const std::size_t coord_count = GetCoordCount(texture_type); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - - u64 parameter_register = instr.gpr20.Value(); - - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = depth_compare; - - Node index_var; - const std::optional sampler = - is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) - : GetSampler(instr.sampler, info); - Node4 values; - if (!sampler) { - for (u32 element = 0; element < values.size(); ++element) { - values[element] = Immediate(0); - } - return values; - } - - std::vector aoffi, ptp; - if (is_aoffi) { - aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); - } else if (is_ptp) { - ptp = GetPtpCoordinates( - {GetRegister(parameter_register++), GetRegister(parameter_register++)}); - } - - Node dc; - if (depth_compare) { - dc = GetRegister(parameter_register++); - } - - const Node component = is_bindless ? Immediate(static_cast(instr.tld4_b.component)) - : Immediate(static_cast(instr.tld4.component)); - - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, - index_var}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { - const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array != 0}; - const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; - const std::size_t coord_count{GetCoordCount(texture_type)}; - - u64 gpr8_cursor{instr.gpr8.Value()}; - const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; - - std::vector coords; - coords.reserve(coord_count); - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(gpr8_cursor++)); - } - - u64 gpr20_cursor{instr.gpr20.Value()}; - // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; - const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; - // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; - // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - - const std::optional sampler = GetSampler(instr.sampler, {}); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - SamplerInfo info; - info.type = texture_type; - info.is_array = is_array; - info.is_shadow = false; - const std::optional sampler = GetSampler(instr.sampler, info); - - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast(instr.gpr20.Value()) - : coord_register + 1; - - std::vector coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back( - GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - std::vector aoffi; - if (aoffi_enabled) { - aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - -std::vector ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, - bool is_tld4) { - const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; - const u32 size = is_tld4 ? 6 : 4; - const s32 wrap_value = is_tld4 ? 32 : 8; - const s32 diff_value = is_tld4 ? 64 : 16; - const u32 mask = (1U << size) - 1; - - std::vector aoffi; - aoffi.reserve(coord_count); - - const auto aoffi_immediate{ - TrackImmediate(aoffi_reg, global_code, static_cast(global_code.size()))}; - if (!aoffi_immediate) { - // Variable access, not supported on AMD. - LOG_WARNING(HW_GPU, - "AOFFI constant folding failed, some hardware might have graphical issues"); - for (std::size_t coord = 0; coord < coord_count; ++coord) { - const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); - aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return aoffi; - } - - for (std::size_t coord = 0; coord < coord_count; ++coord) { - s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; - if (value >= wrap_value) { - value -= diff_value; - } - aoffi.push_back(Immediate(value)); - } - return aoffi; -} - -std::vector ShaderIR::GetPtpCoordinates(std::array ptp_regs) { - static constexpr u32 num_entries = 8; - - std::vector ptp; - ptp.reserve(num_entries); - - const auto global_size = static_cast(global_code.size()); - const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); - const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); - if (!low || !high) { - for (u32 entry = 0; entry < num_entries; ++entry) { - const u32 reg = entry / 4; - const u32 offset = entry % 4; - const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); - const Node condition = - Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); - const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); - ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); - } - return ptp; - } - - const u64 immediate = (static_cast(*high) << 32) | static_cast(*low); - for (u32 entry = 0; entry < num_entries; ++entry) { - s32 value = (immediate >> (entry * 8)) & 0b111111; - if (value >= 32) { - value -= 64; - } - ptp.push_back(Immediate(value)); - } - - return ptp; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using std::move; -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::VideoType; -using Tegra::Shader::VmadShr; -using Tegra::Shader::VmnmxOperation; -using Tegra::Shader::VmnmxType; - -u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - if (opcode->get().GetId() == OpCode::Id::VMNMX) { - DecodeVMNMX(bb, instr); - return pc; - } - - const Node op_a = - GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, - instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [this, instr] { - if (instr.video.use_register_b) { - return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, - instr.video.signed_b, instr.video.type_b, - instr.video.byte_height_b); - } - if (instr.video.signed_b) { - const auto imm = static_cast(instr.alu.GetImm20_16()); - return Immediate(static_cast(imm)); - } else { - return Immediate(instr.alu.GetImm20_16()); - } - }(); - - switch (opcode->get().GetId()) { - case OpCode::Id::VMAD: { - const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node op_c = GetRegister(instr.gpr39); - - Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); - value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); - - if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { - const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); - value = - SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); - } - - SetInternalFlagsFromInteger(bb, value, instr.generates_cc); - SetRegister(bb, instr.gpr0, value); - break; - } - case OpCode::Id::VSETP: { - // We can't use the constant predicate as destination. - ASSERT(instr.vsetp.pred3 != static_cast(Pred::UnusedIndex)); - - const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; - const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); - const Node second_pred = GetPredicate(instr.vsetp.pred39, false); - - const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); - - // Set the primary predicate to the result of Predicate OP SecondPredicate - SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); - - if (instr.vsetp.pred0 != static_cast(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, - // if enabled - const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); - SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); - } - - return pc; -} - -Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, - u64 byte_height) { - if (!is_chunk) { - return BitfieldExtract(op, static_cast(byte_height * 8), 8); - } - - switch (type) { - case VideoType::Size16_Low: - return BitfieldExtract(op, 0, 16); - case VideoType::Size16_High: - return BitfieldExtract(op, 16, 16); - case VideoType::Size32: - // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used - // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. - UNIMPLEMENTED(); - return Immediate(0); - case VideoType::Invalid: - UNREACHABLE_MSG("Invalid instruction encoding"); - return Immediate(0); - default: - UNREACHABLE(); - return Immediate(0); - } -} - -void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { - UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); - UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); - UNIMPLEMENTED_IF(instr.vmnmx.sat); - UNIMPLEMENTED_IF(instr.generates_cc); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node op_c = GetRegister(instr.gpr39); - - const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed - const bool is_oper2_signed = instr.vmnmx.is_dest_signed; - - const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; - Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); - - switch (instr.vmnmx.operation) { - case VmnmxOperation::Mrg_16H: - value = BitfieldInsert(move(op_c), move(value), 16, 16); - break; - case VmnmxOperation::Mrg_16L: - value = BitfieldInsert(move(op_c), move(value), 0, 16); - break; - case VmnmxOperation::Mrg_8B0: - value = BitfieldInsert(move(op_c), move(value), 0, 8); - break; - case VmnmxOperation::Mrg_8B2: - value = BitfieldInsert(move(op_c), move(value), 16, 8); - break; - case VmnmxOperation::Acc: - value = Operation(OperationCode::IAdd, move(value), move(op_c)); - break; - case VmnmxOperation::Min: - value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Max: - value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); - break; - case VmnmxOperation::Nop: - break; - default: - UNREACHABLE(); - break; - } - - SetRegister(bb, instr.gpr0, move(value)); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::Pred; -using Tegra::Shader::ShuffleOperation; -using Tegra::Shader::VoteOperation; - -namespace { - -OperationCode GetOperationCode(VoteOperation vote_op) { - switch (vote_op) { - case VoteOperation::All: - return OperationCode::VoteAll; - case VoteOperation::Any: - return OperationCode::VoteAny; - case VoteOperation::Eq: - return OperationCode::VoteEqual; - default: - UNREACHABLE_MSG("Invalid vote operation={}", vote_op); - return OperationCode::VoteAll; - } -} - -} // Anonymous namespace - -u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - // Signal the backend that this shader uses warp instructions. - uses_warps = true; - - switch (opcode->get().GetId()) { - case OpCode::Id::VOTE: { - const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); - const Node active = Operation(OperationCode::BallotThread, value); - const Node vote = Operation(GetOperationCode(instr.vote.operation), value); - SetRegister(bb, instr.gpr0, active); - SetPredicate(bb, instr.vote.dest_pred, vote); - break; - } - case OpCode::Id::SHFL: { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - Node index = instr.shfl.is_index_imm ? Immediate(static_cast(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - - Node thread_id = Operation(OperationCode::ThreadId); - Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); - Node seg_mask = BitfieldExtract(mask, 8, 16); - - Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); - Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); - Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, - Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); - - Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { - switch (instr.shfl.operation) { - case ShuffleOperation::Idx: - return Operation(OperationCode::IBitwiseOr, - Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), - min_thread_id); - case ShuffleOperation::Down: - return Operation(OperationCode::IAdd, thread_id, index); - case ShuffleOperation::Up: - return Operation(OperationCode::IAdd, thread_id, - Operation(OperationCode::INegate, index)); - case ShuffleOperation::Bfly: - return Operation(OperationCode::IBitwiseXor, thread_id, index); - } - UNREACHABLE(); - return Immediate(0U); - }(); - - Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); - } else { - return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); - } - }(); - - SetPredicate(bb, instr.shfl.pred48, in_bounds); - SetRegister( - bb, instr.gpr0, - Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); - break; - } - case OpCode::Id::FSWZADD: { - UNIMPLEMENTED_IF(instr.fswzadd.ndv); - - Node op_a = GetRegister(instr.gpr8); - Node op_b = GetRegister(instr.gpr20); - Node mask = Immediate(static_cast(instr.fswzadd.swizzle)); - SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); - break; - } - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Instruction; -using Tegra::Shader::OpCode; -using Tegra::Shader::PredCondition; - -u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { - const Instruction instr = {program_code[pc]}; - const auto opcode = OpCode::Decode(instr); - - UNIMPLEMENTED_IF(instr.xmad.sign_a); - UNIMPLEMENTED_IF(instr.xmad.sign_b); - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in XMAD is not implemented"); - - Node op_a = GetRegister(instr.gpr8); - - // TODO(bunnei): Needs to be fixed once op_a or op_b is signed - UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); - const bool is_signed_a = instr.xmad.sign_a == 1; - const bool is_signed_b = instr.xmad.sign_b == 1; - const bool is_signed_c = is_signed_a; - - auto [is_merge, is_psl, is_high_b, mode, op_b_binding, - op_c] = [&]() -> std::tuple { - switch (opcode->get().GetId()) { - case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, - instr.xmad.product_shift_left_second, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), - GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, - instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; - case OpCode::Id::XMAD_RC: - return {false, - false, - instr.xmad.high_b, - instr.xmad.mode_cbf, - GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; - case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, - instr.xmad.product_shift_left, - false, - instr.xmad.mode, - Immediate(static_cast(instr.xmad.imm20_16)), - GetRegister(instr.gpr39)}; - default: - UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; - } - }(); - - op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), - instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); - - const Node original_b = op_b_binding; - const Node op_b = - SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), - is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - - // we already check sign_a and sign_b is difference or not before so just use one in here. - Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); - if (is_psl) { - product = - SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); - } - SetTemporary(bb, 0, product); - product = GetTemporary(0); - - Node original_c = op_c; - const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error - op_c = [&] { - switch (set_mode) { - case Tegra::Shader::XmadMode::None: - return original_c; - case Tegra::Shader::XmadMode::CLo: - return BitfieldExtract(std::move(original_c), 0, 16); - case Tegra::Shader::XmadMode::CHi: - return BitfieldExtract(std::move(original_c), 16, 16); - case Tegra::Shader::XmadMode::CBcc: { - Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), - std::move(shifted_b)); - } - case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); - const Node comp_b = - GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); - const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); - - const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_a, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, - Immediate(0x80000000)), - Immediate(0)); - const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NE, is_signed_b, - SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, - Immediate(0x80000000)), - Immediate(0)); - - Node new_c = Operation( - OperationCode::Select, comp_minus_a, - SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), - original_c); - new_c = Operation( - OperationCode::Select, comp_minus_b, - SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), - std::move(new_c)); - - return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); - } - default: - UNREACHABLE(); - return Immediate(0); - } - }(); - - SetTemporary(bb, 1, op_c); - op_c = GetTemporary(1); - - // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); - SetTemporary(bb, 2, sum); - sum = GetTemporary(2); - if (is_merge) { - const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), - Immediate(0), Immediate(16)); - const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, - Immediate(16)); - sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); - } - - SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, std::move(sum)); - - return pc; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "video_core/shader/expr.h" - -namespace VideoCommon::Shader { -namespace { -bool ExprIsBoolean(const Expr& expr) { - return std::holds_alternative(*expr); -} - -bool ExprBooleanGet(const Expr& expr) { - return std::get_if(expr.get())->value; -} -} // Anonymous namespace - -bool ExprAnd::operator==(const ExprAnd& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprAnd::operator!=(const ExprAnd& b) const { - return !operator==(b); -} - -bool ExprOr::operator==(const ExprOr& b) const { - return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); -} - -bool ExprOr::operator!=(const ExprOr& b) const { - return !operator==(b); -} - -bool ExprNot::operator==(const ExprNot& b) const { - return *operand1 == *b.operand1; -} - -bool ExprNot::operator!=(const ExprNot& b) const { - return !operator==(b); -} - -Expr MakeExprNot(Expr first) { - if (std::holds_alternative(*first)) { - return std::get_if(first.get())->operand1; - } - return MakeExpr(std::move(first)); -} - -Expr MakeExprAnd(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? second : first; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? first : second; - } - return MakeExpr(std::move(first), std::move(second)); -} - -Expr MakeExprOr(Expr first, Expr second) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first) ? first : second; - } - if (ExprIsBoolean(second)) { - return ExprBooleanGet(second) ? second : first; - } - return MakeExpr(std::move(first), std::move(second)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second) { - return (*first) == (*second); -} - -bool ExprAreOpposite(const Expr& first, const Expr& second) { - if (std::holds_alternative(*first)) { - return ExprAreEqual(std::get_if(first.get())->operand1, second); - } - if (std::holds_alternative(*second)) { - return ExprAreEqual(std::get_if(second.get())->operand1, first); - } - return false; -} - -bool ExprIsTrue(const Expr& first) { - if (ExprIsBoolean(first)) { - return ExprBooleanGet(first); - } - return false; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null @@ -1,156 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::ConditionCode; -using Tegra::Shader::Pred; - -class ExprAnd; -class ExprBoolean; -class ExprCondCode; -class ExprGprEqual; -class ExprNot; -class ExprOr; -class ExprPredicate; -class ExprVar; - -using ExprData = std::variant; -using Expr = std::shared_ptr; - -class ExprAnd final { -public: - explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprAnd& b) const; - bool operator!=(const ExprAnd& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprOr final { -public: - explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} - - bool operator==(const ExprOr& b) const; - bool operator!=(const ExprOr& b) const; - - Expr operand1; - Expr operand2; -}; - -class ExprNot final { -public: - explicit ExprNot(Expr a) : operand1{std::move(a)} {} - - bool operator==(const ExprNot& b) const; - bool operator!=(const ExprNot& b) const; - - Expr operand1; -}; - -class ExprVar final { -public: - explicit ExprVar(u32 index) : var_index{index} {} - - bool operator==(const ExprVar& b) const { - return var_index == b.var_index; - } - - bool operator!=(const ExprVar& b) const { - return !operator==(b); - } - - u32 var_index; -}; - -class ExprPredicate final { -public: - explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} - - bool operator==(const ExprPredicate& b) const { - return predicate == b.predicate; - } - - bool operator!=(const ExprPredicate& b) const { - return !operator==(b); - } - - u32 predicate; -}; - -class ExprCondCode final { -public: - explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} - - bool operator==(const ExprCondCode& b) const { - return cc == b.cc; - } - - bool operator!=(const ExprCondCode& b) const { - return !operator==(b); - } - - ConditionCode cc; -}; - -class ExprBoolean final { -public: - explicit ExprBoolean(bool val) : value{val} {} - - bool operator==(const ExprBoolean& b) const { - return value == b.value; - } - - bool operator!=(const ExprBoolean& b) const { - return !operator==(b); - } - - bool value; -}; - -class ExprGprEqual final { -public: - explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} - - bool operator==(const ExprGprEqual& b) const { - return gpr == b.gpr && value == b.value; - } - - bool operator!=(const ExprGprEqual& b) const { - return !operator==(b); - } - - u32 gpr; - u32 value; -}; - -template -Expr MakeExpr(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -bool ExprAreEqual(const Expr& first, const Expr& second); - -bool ExprAreOpposite(const Expr& first, const Expr& second); - -Expr MakeExprNot(Expr first); - -Expr MakeExprAnd(Expr first, Expr second); - -Expr MakeExprOr(Expr first, Expr second); - -bool ExprIsTrue(const Expr& first); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include - -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& shader_config{maxwell3d.regs.shader_config[static_cast(program)]}; - return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; -} - -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { - // Sched instructions appear once every 4 instructions. - constexpr std::size_t SchedPeriod = 4; - const std::size_t absolute_offset = offset - main_offset; - return (absolute_offset % SchedPeriod) == 0; -} - -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { - // This is the encoded version of BRA that jumps to itself. All Nvidia - // shaders end with one. - static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; - static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; - - const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - std::size_t offset = start_offset; - while (offset < program.size()) { - const u64 instruction = program[offset]; - if (!IsSchedInstruction(offset, start_offset)) { - if ((instruction & MASK) == SELF_JUMPING_BRANCH) { - // End on Maxwell's "nop" instruction - break; - } - if (instruction == 0) { - break; - } - } - ++offset; - } - // The last instruction is included in the program size - return std::min(offset + 1, program.size()); -} - -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute) { - ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); - memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); - code.resize(CalculateProgramSize(code, is_compute)); - return code; -} - -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { - size_t unique_identifier = boost::hash_value(code); - if (is_a) { - // VertexA programs include two programs - boost::hash_combine(unique_identifier, boost::hash_value(code_b)); - } - return static_cast(unique_identifier); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" - -namespace Tegra { -class MemoryManager; -} - -namespace VideoCommon::Shader { - -using ProgramCode = std::vector; - -constexpr u32 STAGE_MAIN_OFFSET = 10; -constexpr u32 KERNEL_MAIN_OFFSET = 0; - -/// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); - -/// Gets if the current instruction offset is a scheduler instruction -bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); - -/// Calculates the size of a program stream -std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); - -/// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, - const u8* host_ptr, bool is_compute); - -/// Hashes one (or two) program streams -u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b = {}); - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null @@ -1,701 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" - -namespace VideoCommon::Shader { - -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float - FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - FSwizzleAdd, /// (float a, float b, uint mask) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - IBitMSB, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - UBitMSB, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAnd2, /// (bool2 a) -> bool - - LogicalFOrdLessThan, /// (float a, float b) -> bool - LogicalFOrdEqual, /// (float a, float b) -> bool - LogicalFOrdLessEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterThan, /// (float a, float b) -> bool - LogicalFOrdNotEqual, /// (float a, float b) -> bool - LogicalFOrdGreaterEqual, /// (float a, float b) -> bool - LogicalFOrdered, /// (float a, float b) -> bool - LogicalFUnordered, /// (float a, float b) -> bool - LogicalFUnordLessThan, /// (float a, float b) -> bool - LogicalFUnordEqual, /// (float a, float b) -> bool - LogicalFUnordLessEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterThan, /// (float a, float b) -> bool - LogicalFUnordNotEqual, /// (float a, float b) -> bool - LogicalFUnordGreaterEqual, /// (float a, float b) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - LogicalAddCarry, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 - - ImageLoad, /// (MetaImage, int[N] coords) -> void - ImageStore, /// (MetaImage, int[N] coords) -> void - - AtomicImageAdd, /// (MetaImage, int[N] coords) -> void - AtomicImageAnd, /// (MetaImage, int[N] coords) -> void - AtomicImageOr, /// (MetaImage, int[N] coords) -> void - AtomicImageXor, /// (MetaImage, int[N] coords) -> void - AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - - AtomicUExchange, /// (memory, uint) -> uint - AtomicUAdd, /// (memory, uint) -> uint - AtomicUMin, /// (memory, uint) -> uint - AtomicUMax, /// (memory, uint) -> uint - AtomicUAnd, /// (memory, uint) -> uint - AtomicUOr, /// (memory, uint) -> uint - AtomicUXor, /// (memory, uint) -> uint - - AtomicIExchange, /// (memory, int) -> int - AtomicIAdd, /// (memory, int) -> int - AtomicIMin, /// (memory, int) -> int - AtomicIMax, /// (memory, int) -> int - AtomicIAnd, /// (memory, int) -> int - AtomicIOr, /// (memory, int) -> int - AtomicIXor, /// (memory, int) -> int - - ReduceUAdd, /// (memory, uint) -> void - ReduceUMin, /// (memory, uint) -> void - ReduceUMax, /// (memory, uint) -> void - ReduceUAnd, /// (memory, uint) -> void - ReduceUOr, /// (memory, uint) -> void - ReduceUXor, /// (memory, uint) -> void - - ReduceIAdd, /// (memory, int) -> void - ReduceIMin, /// (memory, int) -> void - ReduceIMax, /// (memory, int) -> void - ReduceIAnd, /// (memory, int) -> void - ReduceIOr, /// (memory, int) -> void - ReduceIXor, /// (memory, int) -> void - - Branch, /// (uint branch_target) -> void - BranchIndirect, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - InvocationId, /// () -> int - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - BallotThread, /// (bool) -> uint - VoteAll, /// (bool) -> bool - VoteAny, /// (bool) -> bool - VoteEqual, /// (bool) -> bool - - ThreadId, /// () -> uint - ThreadEqMask, /// () -> uint - ThreadGeMask, /// () -> uint - ThreadGtMask, /// () -> uint - ThreadLeMask, /// () -> uint - ThreadLtMask, /// () -> uint - ShuffleIndexed, /// (uint value, uint index) -> uint - - Barrier, /// () -> void - MemoryBarrierGroup, /// () -> void - MemoryBarrierGlobal, /// () -> void - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - -enum class MetaStackClass { - Ssy, - Pbk, -}; - -class OperationNode; -class ConditionalNode; -class GprNode; -class CustomVarNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; -class CbufNode; -class LmemNode; -class PatchNode; -class SmemNode; -class GmemNode; -class CommentNode; - -using NodeData = std::variant; -using Node = std::shared_ptr; -using Node4 = std::array; -using NodeBlock = std::vector; - -struct ArraySamplerNode; -struct BindlessSamplerNode; -struct SeparateSamplerNode; - -using TrackSamplerData = std::variant; -using TrackSampler = std::shared_ptr; - -struct SamplerEntry { - /// Bound samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, - bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, - is_buffer{is_buffer_}, is_indexed{is_indexed_} {} - - /// Separate sampler constructor - explicit SamplerEntry(u32 index_, std::pair offsets, std::pair buffers, - Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, - bool is_buffer_) - : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, - buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} - - /// Bindless samplers constructor - explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, - bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, - is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { - } - - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 secondary_offset = 0; ///< Secondary offset in the const buffer. - u32 buffer = 0; ///< Buffer where the bindless sampler is read. - u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. - u32 size = 1; ///< Size of the sampler. - - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. - bool is_separated = false; ///< Whether the image and sampler is separated or not. -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct ArraySamplerNode { - u32 index; - u32 base_offset; - u32 bindless_var; -}; - -/// Represents a tracked separate sampler image pair that was folded statically -struct SeparateSamplerNode { - std::pair indices; - std::pair offsets; -}; - -/// Represents a tracked bindless sampler into a direct const buffer -struct BindlessSamplerNode { - u32 index; - u32 offset; -}; - -struct ImageEntry { -public: - /// Bound images constructor - explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, type{type_} {} - - /// Bindless samplers constructor - explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) - : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} - - void MarkWrite() { - is_written = true; - } - - void MarkRead() { - is_read = true; - } - - void MarkAtomic() { - MarkWrite(); - MarkRead(); - is_atomic = true; - } - - u32 index = 0; - u32 offset = 0; - u32 buffer = 0; - - Tegra::Shader::ImageType type{}; - bool is_bindless = false; - bool is_written = false; - bool is_read = false; - bool is_atomic = false; -}; - -struct GlobalMemoryBase { - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - - [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - -/// Parameters describing an arithmetic operation -struct MetaArithmetic { - bool precise{}; ///< Whether the operation can be constraint or not -}; - -/// Parameters describing a texture sampler -struct MetaTexture { - SamplerEntry sampler; - Node array; - Node depth_compare; - std::vector aoffi; - std::vector ptp; - std::vector derivates; - Node bias; - Node lod; - Node component; - u32 element{}; - Node index; -}; - -struct MetaImage { - const ImageEntry& image; - std::vector values; - u32 element{}; -}; - -/// Parameters that modify an operation but are not part of any particular operand -using Meta = - std::variant; - -class AmendNode { -public: - [[nodiscard]] std::optional GetAmendIndex() const { - if (amend_index == amend_null_index) { - return std::nullopt; - } - return {amend_index}; - } - - void SetAmendIndex(std::size_t index) { - amend_index = index; - } - - void ClearAmend() { - amend_index = amend_null_index; - } - -private: - static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; - std::size_t amend_index{amend_null_index}; -}; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final : public AmendNode { -public: - explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} - - explicit OperationNode(OperationCode code_, Meta meta_) - : OperationNode(code_, std::move(meta_), std::vector{}) {} - - explicit OperationNode(OperationCode code_, std::vector operands_) - : OperationNode(code_, Meta{}, std::move(operands_)) {} - - explicit OperationNode(OperationCode code_, Meta meta_, std::vector operands_) - : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} - - template - explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) - : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} - - [[nodiscard]] OperationCode GetCode() const { - return code; - } - - [[nodiscard]] const Meta& GetMeta() const { - return meta; - } - - [[nodiscard]] std::size_t GetOperandsCount() const { - return operands.size(); - } - - [[nodiscard]] const Node& operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - OperationCode code{}; - Meta meta{}; - std::vector operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final : public AmendNode { -public: - explicit ConditionalNode(Node condition_, std::vector&& code_) - : condition{std::move(condition_)}, code{std::move(code_)} {} - - [[nodiscard]] const Node& GetCondition() const { - return condition; - } - - [[nodiscard]] const std::vector& GetCode() const { - return code; - } - -private: - Node condition; ///< Condition to be satisfied - std::vector code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return static_cast(index); - } - -private: - Tegra::Shader::Register index{}; -}; - -/// A custom variable -class CustomVarNode final { -public: - explicit constexpr CustomVarNode(u32 index_) : index{index_} {} - - [[nodiscard]] constexpr u32 GetIndex() const { - return index; - } - -private: - u32 index{}; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value_) : value{value_} {} - - [[nodiscard]] constexpr u32 GetValue() const { - return value; - } - -private: - u32 value{}; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} - - [[nodiscard]] constexpr InternalFlag GetFlag() const { - return flag; - } - -private: - InternalFlag flag{}; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) - : index{index_}, negated{negated_} {} - - [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { - return index; - } - - [[nodiscard]] constexpr bool IsNegated() const { - return negated; - } - -private: - Tegra::Shader::Pred index{}; - bool negated{}; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) - : buffer{std::move(buffer_)}, index{index_}, element{element_} {} - - // Initialize for physical attributes (index is a variable value). - explicit AbufNode(Node physical_address_, Node buffer_ = {}) - : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} - - [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - [[nodiscard]] u32 GetElement() const { - return element; - } - - [[nodiscard]] const Node& GetBuffer() const { - return buffer; - } - - [[nodiscard]] bool IsPhysicalBuffer() const { - return static_cast(physical_address); - } - - [[nodiscard]] const Node& GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address; - Node buffer; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Patch memory (used to communicate tessellation stages). -class PatchNode final { -public: - explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} - - [[nodiscard]] constexpr u32 GetOffset() const { - return offset; - } - -private: - u32 offset{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} - - [[nodiscard]] u32 GetIndex() const { - return index; - } - - [[nodiscard]] const Node& GetOffset() const { - return offset; - } - -private: - u32 index{}; - Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit LmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Shared memory node -class SmemNode final { -public: - explicit SmemNode(Node address_) : address{std::move(address_)} {} - - [[nodiscard]] const Node& GetAddress() const { - return address; - } - -private: - Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) - : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, - descriptor{descriptor_} {} - - [[nodiscard]] const Node& GetRealAddress() const { - return real_address; - } - - [[nodiscard]] const Node& GetBaseAddress() const { - return base_address; - } - - [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - Node real_address; - Node base_address; - GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text_) : text{std::move(text_)} {} - - [[nodiscard]] const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -Node Conditional(Node condition, std::vector code) { - return MakeNode(std::move(condition), std::move(code)); -} - -Node Comment(std::string text) { - return MakeNode(std::move(text)); -} - -Node Immediate(u32 value) { - return MakeNode(value); -} - -Node Immediate(s32 value) { - return Immediate(static_cast(value)); -} - -Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); -} - -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldExtract: - return OperationCode::UBitfieldExtract; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::AtomicIExchange: - return OperationCode::AtomicUExchange; - case OperationCode::AtomicIAdd: - return OperationCode::AtomicUAdd; - case OperationCode::AtomicIMin: - return OperationCode::AtomicUMin; - case OperationCode::AtomicIMax: - return OperationCode::AtomicUMax; - case OperationCode::AtomicIAnd: - return OperationCode::AtomicUAnd; - case OperationCode::AtomicIOr: - return OperationCode::AtomicUOr; - case OperationCode::AtomicIXor: - return OperationCode::AtomicUXor; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); - return {}; - } -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" - -namespace VideoCommon::Shader { - -/// This arithmetic operation cannot be constraint -inline constexpr MetaArithmetic PRECISE = {true}; -/// This arithmetic operation can be optimized away -inline constexpr MetaArithmetic NO_PRECISE = {false}; - -/// Creates a conditional node -Node Conditional(Node condition, std::vector code); - -/// Creates a commentary node -Node Comment(std::string text); - -/// Creates an u32 immediate -Node Immediate(u32 value); - -/// Creates a s32 immediate -Node Immediate(s32 value); - -/// Creates a f32 immediate -Node Immediate(f32 value); - -/// Converts an signed operation code to an unsigned operation code -OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - -template -Node MakeNode(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T(std::forward(args)...)); -} - -template -TrackSampler MakeTrackSampler(Args&&... args) { - static_assert(std::is_convertible_v); - return std::make_shared(T{std::forward(args)...}); -} - -template -Node Operation(OperationCode code, Args&&... args) { - if constexpr (sizeof...(args) == 0) { - return MakeNode(code); - } else if constexpr (std::is_convertible_v>, - Meta>) { - return MakeNode(code, std::forward(args)...); - } else { - return MakeNode(code, Meta{}, std::forward(args)...); - } -} - -template -Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { - return Operation(SignedToUnsignedCode(code, is_signed), std::forward(args)...); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::ConstBufferEngineInterface; -using Tegra::Engines::SamplerDescriptor; -using Tegra::Engines::ShaderType; - -namespace { - -GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage == ShaderType::Compute) { - return {}; - } - - auto& graphics = dynamic_cast(engine); - - return { - .tfb_layouts = graphics.regs.tfb_layouts, - .tfb_varying_locs = graphics.regs.tfb_varying_locs, - .primitive_topology = graphics.regs.draw.topology, - .tessellation_primitive = graphics.regs.tess_mode.prim, - .tessellation_spacing = graphics.regs.tess_mode.spacing, - .tfb_enabled = graphics.regs.tfb_enabled != 0, - .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, - }; -} - -ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { - if (shader_stage != ShaderType::Compute) { - return {}; - } - - auto& compute = dynamic_cast(engine); - const auto& launch = compute.launch_description; - - return { - .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, - .shared_memory_size_in_words = launch.shared_alloc, - .local_memory_size_in_words = launch.local_pos_alloc, - }; -} - -} // Anonymous namespace - -Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) - : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, - bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} - -Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) - : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( - shader_stage, engine_)} {} - -Registry::~Registry() = default; - -std::optional Registry::ObtainKey(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainSeparateSampler( - std::pair buffers, std::pair offsets) { - SeparateSamplerKey key; - key.buffers = buffers; - key.offsets = offsets; - const auto iter = separate_samplers.find(key); - if (iter != separate_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - - const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); - const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); - const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); - separate_samplers.emplace(key, value); - return value; -} - -std::optional Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -bool Registry::IsConsistent() const { - if (!engine) { - return true; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool Registry::HasEqualKeys(const Registry& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -const GraphicsInfo& Registry::GetGraphicsInfo() const { - ASSERT(stage != Tegra::Engines::ShaderType::Compute); - return graphics_info; -} - -const ComputeInfo& Registry::GetComputeInfo() const { - ASSERT(stage == Tegra::Engines::ShaderType::Compute); - return compute_info; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null @@ -1,172 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -struct SeparateSamplerKey { - std::pair buffers; - std::pair offsets; -}; - -} // namespace VideoCommon::Shader - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { - return std::hash{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ - key.offsets.second); - } -}; - -template <> -struct equal_to { - bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, - const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { - return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; - } -}; - -} // namespace std - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map; -using SeparateSamplerMap = - std::unordered_map; -using BindlessSamplerMap = - std::unordered_map, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -struct GraphicsInfo { - using Maxwell = Tegra::Engines::Maxwell3D::Regs; - - std::array - tfb_layouts{}; - std::array, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; - Maxwell::PrimitiveTopology primitive_topology{}; - Maxwell::TessellationPrimitive tessellation_primitive{}; - Maxwell::TessellationSpacing tessellation_spacing{}; - bool tfb_enabled = false; - bool tessellation_clockwise = false; -}; -static_assert(std::is_trivially_copyable_v && - std::is_standard_layout_v); - -struct ComputeInfo { - std::array workgroup_size{}; - u32 shared_memory_size_in_words = 0; - u32 local_memory_size_in_words = 0; -}; -static_assert(std::is_trivially_copyable_v && std::is_standard_layout_v); - -struct SerializedRegistryInfo { - VideoCore::GuestDriverProfile guest_driver_profile; - u32 bound_buffer = 0; - GraphicsInfo graphics; - ComputeInfo compute; -}; - -/** - * The Registry is a class use to interface the 3D and compute engines with the shader compiler. - * With it, the shader can obtain required data from GPU state and store it for disk shader - * compilation. - */ -class Registry { -public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); - - explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine_); - - ~Registry(); - - /// Retrieves a key from the registry, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional ObtainKey(u32 buffer, u32 offset); - - std::optional ObtainBoundSampler(u32 offset); - - std::optional ObtainSeparateSampler( - std::pair buffers, std::pair offsets); - - std::optional ObtainBindlessSampler(u32 buffer, u32 offset); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Checks keys and samplers against engine's current const buffers. - /// Returns true if they are the same value, false otherwise. - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the registry. - bool HasEqualKeys(const Registry& rhs) const; - - /// Returns graphics information from this shader - const GraphicsInfo& GetGraphicsInfo() const; - - /// Returns compute information from this shader - const ComputeInfo& GetComputeInfo() const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { - return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; - } - -private: - const Tegra::Engines::ShaderType stage; - VideoCore::GuestDriverProfile stored_guest_driver_profile; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - SeparateSamplerMap separate_samplers; - BindlessSamplerMap bindless_samplers; - u32 bound_buffer; - GraphicsInfo graphics_info; - ComputeInfo compute_info; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null @@ -1,464 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/logging/log.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -using Tegra::Shader::Attribute; -using Tegra::Shader::Instruction; -using Tegra::Shader::IpaMode; -using Tegra::Shader::Pred; -using Tegra::Shader::PredCondition; -using Tegra::Shader::PredOperation; -using Tegra::Shader::Register; - -ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, - Registry& registry_) - : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ - registry_} { - Decode(); - PostDecode(); -} - -ShaderIR::~ShaderIR() = default; - -Node ShaderIR::GetRegister(Register reg) { - if (reg != Register::ZeroIndex) { - used_registers.insert(static_cast(reg)); - } - return MakeNode(reg); -} - -Node ShaderIR::GetCustomVariable(u32 id) { - return MakeNode(id); -} - -Node ShaderIR::GetImmediate19(Instruction instr) { - return Immediate(instr.alu.GetImm20_19()); -} - -Node ShaderIR::GetImmediate32(Instruction instr) { - return Immediate(instr.alu.GetImm20_32()); -} - -Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); - - return MakeNode(index, Immediate(offset)); -} - -Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { - const auto index = static_cast(index_); - const auto offset = static_cast(offset_); - - used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); - - Node final_offset = [&] { - // Attempt to inline constant buffer without a variable offset. This is done to allow - // tracking LDC calls. - if (const auto gpr = std::get_if(&*node)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - return Immediate(offset); - } - } - return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); - }(); - return MakeNode(index, std::move(final_offset)); -} - -Node ShaderIR::GetPredicate(u64 pred_, bool negated) { - const auto pred = static_cast(pred_); - if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { - used_predicates.insert(pred); - } - - return MakeNode(pred, negated); -} - -Node ShaderIR::GetPredicate(bool immediate) { - return GetPredicate(static_cast(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); -} - -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_input_attributes.emplace(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { - uses_physical_attributes = true; - return MakeNode(GetRegister(physical_address), buffer); -} - -Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { - MarkAttributeUsage(index, element); - used_output_attributes.insert(index); - return MakeNode(index, static_cast(element), std::move(buffer)); -} - -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { - Node node = MakeNode(flag); - if (negated) { - return Operation(OperationCode::LogicalNegate, std::move(node)); - } - return node; -} - -Node ShaderIR::GetLocalMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetSharedMemory(Node address) { - return MakeNode(std::move(address)); -} - -Node ShaderIR::GetTemporary(u32 id) { - return GetRegister(Register::ZeroIndex + 1 + id); -} - -Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { - switch (size) { - case Register::Size::Byte: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(24)); - return value; - case Register::Size::Short: - value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, - std::move(value), Immediate(16)); - return value; - case Register::Size::Word: - // Default - do nothing - return value; - default: - UNREACHABLE_MSG("Unimplemented conversion size: {}", size); - return value; - } -} - -Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { - if (!is_signed) { - // Absolute or negate on an unsigned is pointless - return value; - } - if (absolute) { - value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); - } - return value; -} - -Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { - Node value = Immediate(instr.half_imm.PackImmediates()); - if (!has_negation) { - return value; - } - - Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); - Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); - - return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), - std::move(second_negate)); -} - -Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { - return Operation(OperationCode::HUnpack, type, std::move(value)); -} - -Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { - switch (merge) { - case Tegra::Shader::HalfMerge::H0_H1: - return src; - case Tegra::Shader::HalfMerge::F32: - return Operation(OperationCode::HMergeF32, std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H0: - return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); - case Tegra::Shader::HalfMerge::Mrg_H1: - return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); - } - UNREACHABLE(); - return src; -} - -Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { - if (absolute) { - value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); - } - if (negate) { - value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), - GetPredicate(true)); - } - return value; -} - -Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { - if (!saturate) { - return value; - } - - Node positive_zero = Immediate(std::copysignf(0, 1)); - Node positive_one = Immediate(1.0f); - return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), - std::move(positive_one)); -} - -Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - if (condition == PredCondition::T) { - return GetPredicate(true); - } else if (condition == PredCondition::F) { - return GetPredicate(false); - } - - static constexpr std::array comparison_table{ - OperationCode(0), - OperationCode::LogicalFOrdLessThan, // LT - OperationCode::LogicalFOrdEqual, // EQ - OperationCode::LogicalFOrdLessEqual, // LE - OperationCode::LogicalFOrdGreaterThan, // GT - OperationCode::LogicalFOrdNotEqual, // NE - OperationCode::LogicalFOrdGreaterEqual, // GE - OperationCode::LogicalFOrdered, // NUM - OperationCode::LogicalFUnordered, // NAN - OperationCode::LogicalFUnordLessThan, // LTU - OperationCode::LogicalFUnordEqual, // EQU - OperationCode::LogicalFUnordLessEqual, // LEU - OperationCode::LogicalFUnordGreaterThan, // GTU - OperationCode::LogicalFUnordNotEqual, // NEU - OperationCode::LogicalFUnordGreaterEqual, // GEU - }; - const std::size_t index = static_cast(condition); - ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - - return Operation(comparison_table[index], op_a, op_b); -} - -Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, - std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); -} - -Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, - Node op_b) { - static constexpr std::array comparison_table{ - std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, - }; - - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); -} - -OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - static constexpr std::array operation_table{ - OperationCode::LogicalAnd, - OperationCode::LogicalOr, - OperationCode::LogicalXor, - }; - - const auto index = static_cast(operation); - if (index >= operation_table.size()) { - UNIMPLEMENTED_MSG("Unknown predicate operation."); - return {}; - } - - return operation_table[index]; -} - -Node ShaderIR::GetConditionCode(ConditionCode cc) const { - switch (cc) { - case ConditionCode::NEU: - return GetInternalFlag(InternalFlag::Zero, true); - case ConditionCode::FCSM_TR: - UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); - return MakeNode(Pred::NeverExecute, false); - default: - UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); - return MakeNode(Pred::NeverExecute, false); - } -} - -void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { - bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); -} - -void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); -} - -void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { - bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); -} - -void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { - bb.push_back( - Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); -} - -void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { - SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); -} - -void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { - if (!sets_cc) { - return; - } - Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); - SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); - LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); -} - -Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), - Immediate(offset), Immediate(bits)); -} - -Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { - return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), - Immediate(bits)); -} - -void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { - switch (index) { - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - break; - case 1: - uses_layer = true; - break; - case 2: - uses_viewport_index = true; - break; - case 3: - uses_point_size = true; - break; - } - break; - case Attribute::Index::TessCoordInstanceIDVertexID: - switch (element) { - case 2: - uses_instance_id = true; - break; - case 3: - uses_vertex_id = true; - break; - } - break; - case Attribute::Index::ClipDistances0123: - case Attribute::Index::ClipDistances4567: { - const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; - used_clip_distances.at(clip_index) = true; - break; - } - case Attribute::Index::FrontColor: - case Attribute::Index::FrontSecondaryColor: - case Attribute::Index::BackColor: - case Attribute::Index::BackSecondaryColor: - uses_legacy_varyings = true; - break; - default: - if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { - uses_legacy_varyings = true; - } - break; - } -} - -std::size_t ShaderIR::DeclareAmend(Node new_amend) { - const auto id = amend_code.size(); - amend_code.push_back(std::move(new_amend)); - return id; -} - -u32 ShaderIR::NewCustomVariable() { - return num_custom_variables++; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_bytecode.h" -#include "video_core/engines/shader_header.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/compiler_settings.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/node.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct ShaderBlock; - -constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; - -struct ConstBuffer { - constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) - : max_offset{max_offset_}, is_indirect{is_indirect_} {} - - constexpr ConstBuffer() = default; - - void MarkAsUsed(u64 offset) { - max_offset = std::max(max_offset, static_cast(offset)); - } - - void MarkAsUsedIndirect() { - is_indirect = true; - } - - bool IsIndirect() const { - return is_indirect; - } - - u32 GetSize() const { - return max_offset + static_cast(sizeof(float)); - } - - u32 GetMaxOffset() const { - return max_offset; - } - -private: - u32 max_offset = 0; - bool is_indirect = false; -}; - -struct GlobalMemoryUsage { - bool is_read{}; - bool is_written{}; -}; - -class ShaderIR final { -public: - explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, - CompilerSettings settings_, Registry& registry_); - ~ShaderIR(); - - const std::map& GetBasicBlocks() const { - return basic_blocks; - } - - const std::set& GetRegisters() const { - return used_registers; - } - - const std::set& GetPredicates() const { - return used_predicates; - } - - const std::set& GetInputAttributes() const { - return used_input_attributes; - } - - const std::set& GetOutputAttributes() const { - return used_output_attributes; - } - - const std::map& GetConstantBuffers() const { - return used_cbufs; - } - - const std::list& GetSamplers() const { - return used_samplers; - } - - const std::list& GetImages() const { - return used_images; - } - - const std::array& GetClipDistances() - const { - return used_clip_distances; - } - - const std::map& GetGlobalMemory() const { - return used_global_memory; - } - - std::size_t GetLength() const { - return static_cast(coverage_end * sizeof(u64)); - } - - bool UsesLayer() const { - return uses_layer; - } - - bool UsesViewportIndex() const { - return uses_viewport_index; - } - - bool UsesPointSize() const { - return uses_point_size; - } - - bool UsesInstanceId() const { - return uses_instance_id; - } - - bool UsesVertexId() const { - return uses_vertex_id; - } - - bool UsesLegacyVaryings() const { - return uses_legacy_varyings; - } - - bool UsesYNegate() const { - return uses_y_negate; - } - - bool UsesWarps() const { - return uses_warps; - } - - bool HasPhysicalAttributes() const { - return uses_physical_attributes; - } - - const Tegra::Shader::Header& GetHeader() const { - return header; - } - - bool IsFlowStackDisabled() const { - return disable_flow_stack; - } - - bool IsDecompiled() const { - return decompiled; - } - - const ASTManager& GetASTManager() const { - return program_manager; - } - - ASTNode GetASTProgram() const { - return program_manager.GetProgram(); - } - - u32 GetASTNumVariables() const { - return program_manager.GetVariables(); - } - - u32 ConvertAddressToNvidiaSpace(u32 address) const { - return (address - main_offset) * static_cast(sizeof(Tegra::Shader::Instruction)); - } - - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; - - const Node& GetAmendNode(std::size_t index) const { - return amend_code[index]; - } - - u32 GetNumCustomVariables() const { - return num_custom_variables; - } - -private: - friend class ASTDecoder; - - struct SamplerInfo { - std::optional type; - std::optional is_array; - std::optional is_shadow; - std::optional is_buffer; - - constexpr bool IsComplete() const noexcept { - return type && is_array && is_shadow && is_buffer; - } - }; - - void Decode(); - void PostDecode(); - - NodeBlock DecodeRange(u32 begin, u32 end); - void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); - void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); - - /** - * Decodes a single instruction from Tegra to IR. - * @param bb Basic block where the nodes will be written to. - * @param pc Program counter. Offset to decode. - * @return Next address to decode. - */ - u32 DecodeInstr(NodeBlock& bb, u32 pc); - - u32 DecodeArithmetic(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); - u32 DecodeBfe(NodeBlock& bb, u32 pc); - u32 DecodeBfi(NodeBlock& bb, u32 pc); - u32 DecodeShift(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); - u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); - u32 DecodeFfma(NodeBlock& bb, u32 pc); - u32 DecodeHfma2(NodeBlock& bb, u32 pc); - u32 DecodeConversion(NodeBlock& bb, u32 pc); - u32 DecodeWarp(NodeBlock& bb, u32 pc); - u32 DecodeMemory(NodeBlock& bb, u32 pc); - u32 DecodeTexture(NodeBlock& bb, u32 pc); - u32 DecodeImage(NodeBlock& bb, u32 pc); - u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); - u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); - u32 DecodeFloatSet(NodeBlock& bb, u32 pc); - u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); - u32 DecodeHalfSet(NodeBlock& bb, u32 pc); - u32 DecodeVideo(NodeBlock& bb, u32 pc); - u32 DecodeXmad(NodeBlock& bb, u32 pc); - u32 DecodeOther(NodeBlock& bb, u32 pc); - - /// Generates a node for a passed register. - Node GetRegister(Tegra::Shader::Register reg); - /// Generates a node for a custom variable - Node GetCustomVariable(u32 id); - /// Generates a node representing a 19-bit immediate value - Node GetImmediate19(Tegra::Shader::Instruction instr); - /// Generates a node representing a 32-bit immediate value - Node GetImmediate32(Tegra::Shader::Instruction instr); - /// Generates a node representing a constant buffer - Node GetConstBuffer(u64 index, u64 offset); - /// Generates a node representing a constant buffer with a variadic offset - Node GetConstBufferIndirect(u64 index, u64 offset, Node node); - /// Generates a node for a passed predicate. It can be optionally negated - Node GetPredicate(u64 pred, bool negated = false); - /// Generates a predicate node for an immediate true or false value - Node GetPredicate(bool immediate); - /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); - /// Generates a node representing a physical input attribute. - Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); - /// Generates a node representing an output attribute. Keeps track of used attributes. - Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); - /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false) const; - /// Generates a node representing a local memory address - Node GetLocalMemory(Node address); - /// Generates a node representing a shared memory address - Node GetSharedMemory(Node address); - /// Generates a temporary, internally it uses a post-RZ register - Node GetTemporary(u32 id); - - /// Sets a register. src value must be a number-evaluated node. - void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); - /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(NodeBlock& bb, u64 dest, Node src); - /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); - /// Sets a local memory address with a value. - void SetLocalMemory(NodeBlock& bb, Node address, Node value); - /// Sets a shared memory address with a value. - void SetSharedMemory(NodeBlock& bb, Node address, Node value); - /// Sets a temporary. Internally it uses a post-RZ register - void SetTemporary(NodeBlock& bb, u32 id, Node value); - - /// Sets internal flags from a float - void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); - /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); - - /// Conditionally absolute/negated float. Absolute is applied first - Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); - /// Conditionally saturates a float - Node GetSaturatedFloat(Node value, bool saturate = true); - - /// Converts an integer to different sizes. - Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); - /// Conditionally absolute/negated integer. Absolute is applied first - Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); - - /// Unpacks a half immediate from an instruction - Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); - /// Unpacks a binary value into a half float pair with a type format - Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); - /// Merges a half pair into another value - Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); - /// Conditionally absolute/negated half float pair. Absolute is applied first - Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); - /// Conditionally saturates a half float pair - Node GetSaturatedHalfFloat(Node value, bool saturate = true); - - /// Get image component value by type and size - std::pair GetComponentValue(Tegra::Texture::ComponentType component_type, - u32 component_size, Node original_value); - - /// Returns a predicate comparing two floats - Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - /// Returns a predicate comparing two integers - Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, - Node op_a, Node op_b); - /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared - Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); - - /// Returns a predicate combiner operation - OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - - /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, - std::optional sampler); - - /// Accesses a texture sampler. - std::optional GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); - - /// Accesses a texture sampler for a bindless texture. - std::optional GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, - Node& index_var); - - /// Accesses an image. - ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); - - /// Access a bindless image sampler. - ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - - /// Extracts a sequence of bits from a node - Node BitfieldExtract(Node value, u32 offset, u32 bits); - - /// Inserts a sequence of bits from a node - Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); - - /// Marks the usage of a input or output attribute. - void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); - - /// Decodes VMNMX instruction and inserts its code into the passed basic block. - void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); - - void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); - - void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components, bool ignore_mask = false); - - Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, bool is_aoffi, - std::optional bindless_reg); - - Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); - - Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, - bool is_bindless); - - Node4 GetTldCode(Tegra::Shader::Instruction instr); - - Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool is_array); - - std::tuple ValidateAndGetCoordinateElement( - Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, - bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); - - std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); - - std::vector GetPtpCoordinates(std::array ptp_regs); - - Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, std::vector coords, - Node array, Node depth_compare, u32 bias_offset, std::vector aoffi, - std::optional bindless_reg); - - Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, - u64 byte_height); - - void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, - Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, - Tegra::Shader::PredicateResultMode predicate_mode, - Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, - Node op_c, Node imm_lut, bool sets_cc); - - std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); - - std::pair HandleBindlessIndirectRead(const CbufNode& cbuf, - const OperationNode& operation, - Node gpr, Node base_offset, - Node tracked, const NodeBlock& code, - s64 cursor); - - std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - - std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const; - - std::tuple TrackGlobalMemory(NodeBlock& bb, - Tegra::Shader::Instruction instr, - bool is_read, bool is_write); - - /// Register new amending code and obtain the reference id. - std::size_t DeclareAmend(Node new_amend); - - u32 NewCustomVariable(); - - const ProgramCode& program_code; - const u32 main_offset; - const CompilerSettings settings; - Registry& registry; - - bool decompiled{}; - bool disable_flow_stack{}; - - u32 coverage_begin{}; - u32 coverage_end{}; - - std::map basic_blocks; - NodeBlock global_code; - ASTManager program_manager{true, true}; - std::vector amend_code; - u32 num_custom_variables{}; - - std::set used_registers; - std::set used_predicates; - std::set used_input_attributes; - std::set used_output_attributes; - std::map used_cbufs; - std::list used_samplers; - std::list used_images; - std::array used_clip_distances{}; - std::map used_global_memory; - bool uses_layer{}; - bool uses_viewport_index{}; - bool uses_point_size{}; - bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes - bool uses_instance_id{}; - bool uses_vertex_id{}; - bool uses_legacy_varyings{}; - bool uses_y_negate{}; - bool uses_warps{}; - bool uses_indexed_samplers{}; - - Tegra::Shader::Header header; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/common_types.h" -#include "video_core/shader/node.h" -#include "video_core/shader/node_helper.h" -#include "video_core/shader/shader_ir.h" - -namespace VideoCommon::Shader { - -namespace { - -std::pair FindOperation(const NodeBlock& code, s64 cursor, - OperationCode operation_code) { - for (; cursor >= 0; --cursor) { - Node node = code.at(cursor); - - if (const auto operation = std::get_if(&*node)) { - if (operation->GetCode() == operation_code) { - return {std::move(node), cursor}; - } - } - - if (const auto conditional = std::get_if(&*node)) { - const auto& conditional_code = conditional->GetCode(); - auto result = FindOperation( - conditional_code, static_cast(conditional_code.size() - 1), operation_code); - auto& found = result.first; - if (found) { - return {std::move(found), cursor}; - } - } - } - return {}; -} - -std::optional> DecoupleIndirectRead(const OperationNode& operation) { - if (operation.GetCode() != OperationCode::UAdd) { - return std::nullopt; - } - Node gpr; - Node offset; - ASSERT(operation.GetOperandsCount() == 2); - for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { - Node operand = operation[i]; - if (std::holds_alternative(*operand)) { - offset = operation[i]; - } else if (std::holds_alternative(*operand)) { - gpr = operation[i]; - } - } - if (offset && gpr) { - return std::make_pair(gpr, offset); - } - return std::nullopt; -} - -bool AmendNodeCv(std::size_t amend_index, Node node) { - if (const auto operation = std::get_if(&*node)) { - operation->SetAmendIndex(amend_index); - return true; - } - if (const auto conditional = std::get_if(&*node)) { - conditional->SetAmendIndex(amend_index); - return true; - } - return false; -} - -} // Anonymous namespace - -std::pair ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { - if (const auto cbuf = std::get_if(&*tracked)) { - const u32 cbuf_index = cbuf->GetIndex(); - - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - auto track = MakeTrackSampler(cbuf_index, immediate->GetValue()); - return {tracked, track}; - } - if (const auto operation = std::get_if(&*offset)) { - const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf_index) { - return {}; - } - if (const std::optional pair = DecoupleIndirectRead(*operation)) { - auto [gpr, base_offset] = *pair; - return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, - code, cursor); - } - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackBindlessSampler(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - const OperationNode& op = *operation; - - const OperationCode opcode = operation->GetCode(); - if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { - ASSERT(op.GetOperandsCount() == 2); - auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); - auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); - if (node_a && node_b) { - auto track = MakeTrackSampler(std::pair{index_a, index_b}, - std::pair{offset_a, offset_b}); - return {tracked, std::move(track)}; - } - } - std::size_t i = op.GetOperandsCount(); - while (i--) { - if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { - // Constant buffer found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackBindlessSampler(tracked, conditional_code, - static_cast(conditional_code.size())); - } - return {}; -} - -std::pair ShaderIR::HandleBindlessIndirectRead( - const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, - const NodeBlock& code, s64 cursor) { - const auto offset_imm = std::get(*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); - Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); - - Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - - // TODO: Implement bindless index custom variable - auto track = - MakeTrackSampler(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); - return {tracked, track}; -} - -std::tuple ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, - s64 cursor) const { - if (const auto cbuf = std::get_if(&*tracked)) { - // Constant buffer found, test if it's an immediate - const auto& offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if(&*offset)) { - return {tracked, cbuf->GetIndex(), immediate->GetValue()}; - } - return {}; - } - if (const auto gpr = std::get_if(&*tracked)) { - if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return {}; - } - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same - // register that it uses as operand - const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); - if (!source) { - return {}; - } - return TrackCbuf(source, code, new_cursor); - } - if (const auto operation = std::get_if(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { - // Cbuf found in operand. - return found; - } - } - return {}; - } - if (const auto conditional = std::get_if(&*tracked)) { - const auto& conditional_code = conditional->GetCode(); - return TrackCbuf(tracked, conditional_code, static_cast(conditional_code.size())); - } - return {}; -} - -std::optional ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { - // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register - // that it uses as operand - const auto result = TrackRegister(&std::get(*tracked), code, cursor - 1); - const auto& found = result.first; - if (!found) { - return std::nullopt; - } - if (const auto immediate = std::get_if(&*found)) { - return immediate->GetValue(); - } - return std::nullopt; -} - -std::pair ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) const { - for (; cursor >= 0; --cursor) { - const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); - if (!found_node) { - return {}; - } - const auto operation = std::get_if(&*found_node); - ASSERT(operation); - - const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if(&*target)) { - if (gpr_target->GetIndex() == tracked->GetIndex()) { - return {(*operation)[1], new_cursor}; - } - } - } - return {}; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/transform_feedback.h" - -namespace VideoCommon::Shader { - -namespace { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 - -/// Attribute offsets that describe a vector -constexpr std::array VECTORS = { - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] -}; -} // namespace - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info) { - - std::unordered_map tfb; - - for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = info.tfb_varying_locs[buffer]; - const auto& layout = info.tfb_layouts[buffer]; - const std::size_t varying_count = layout.varying_count; - - std::size_t highest = 0; - - for (std::size_t offset = 0; offset < varying_count; ++offset) { - const std::size_t base_offset = offset; - const u8 location = locations[offset]; - - VaryingTFB varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * sizeof(u32); - varying.components = 1; - - if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - - [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; - UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); - - highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); - } - - UNIMPLEMENTED_IF(highest != layout.stride); - } - return tfb; -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/shader/registry.h" - -namespace VideoCommon::Shader { - -struct VaryingTFB { - std::size_t buffer; - std::size_t stride; - std::size_t offset; - std::size_t components; -}; - -std::unordered_map BuildTransformFeedback(const GraphicsInfo& info); - -} // namespace VideoCommon::Shader -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/shader_recompiler/CMakeLists.txt | 13 +- .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 117 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 419 ++-- .../spirv/emit_spirv_bitwise_conversion.cpp | 24 +- .../backend/spirv/emit_spirv_composite.cpp | 48 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 42 +- .../backend/spirv/emit_spirv_control_flow.cpp | 10 +- .../backend/spirv/emit_spirv_floating_point.cpp | 92 +- .../backend/spirv/emit_spirv_integer.cpp | 60 +- .../backend/spirv/emit_spirv_logical.cpp | 40 +- .../backend/spirv/emit_spirv_memory.cpp | 56 +- .../backend/spirv/emit_spirv_select.cpp | 8 +- .../backend/spirv/emit_spirv_undefined.cpp | 10 +- src/shader_recompiler/environment.h | 6 +- src/shader_recompiler/file_environment.cpp | 6 +- src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 + src/shader_recompiler/frontend/ir/post_order.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 + .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 35 +- .../maxwell/translate/impl/not_implemented.cpp | 4 - src/shader_recompiler/main.cpp | 2 +- src/shader_recompiler/profile.h | 13 + src/shader_recompiler/recompiler.cpp | 27 + src/shader_recompiler/recompiler.h | 18 + src/video_core/CMakeLists.txt | 6 +- src/video_core/engines/kepler_compute.h | 1 - src/video_core/engines/shader_bytecode.h | 2298 -------------------- src/video_core/engines/shader_header.h | 158 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 +- .../renderer_vulkan/vk_compute_pipeline.h | 43 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 + .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- src/video_core/renderer_vulkan/vk_resource_pool.h | 12 +- 43 files changed, 1003 insertions(+), 3036 deletions(-) create mode 100644 src/shader_recompiler/profile.h create mode 100644 src/shader_recompiler/recompiler.cpp create mode 100644 src/shader_recompiler/recompiler.h delete mode 100644 src/video_core/engines/shader_bytecode.h delete mode 100644 src/video_core/engines/shader_header.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 84be94a8d..b56bdd3d9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(shader_recompiler +add_library(shader_recompiler STATIC backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -85,13 +85,19 @@ add_executable(shader_recompiler ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp - main.cpp object_pool.h + profile.h + recompiler.cpp + recompiler.h shader_info.h ) -target_include_directories(video_core PRIVATE sirit) +target_include_directories(shader_recompiler PRIVATE sirit) target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) +target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) + +add_executable(shader_util main.cpp) +target_link_libraries(shader_util PRIVATE shader_recompiler) if (MSVC) target_compile_options(shader_recompiler PRIVATE @@ -121,3 +127,4 @@ else() endif() create_target_directory_groups(shader_recompiler) +create_target_directory_groups(shader_util) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 1c985aff8..770067d98 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -115,6 +115,7 @@ void EmitContext::DefineConstantBuffers(const Info& info) { for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); binding += desc.count; @@ -143,6 +144,7 @@ void EmitContext::DefineStorageBuffers(const Info& info) { for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", binding)); std::fill_n(ssbos.data() + binding, desc.count, id); binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 55018332e..d59718435 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -2,8 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include #include +#include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -14,10 +17,10 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits : FuncTraits {}; -template -struct FuncTraits { +template +struct FuncTraits { using ReturnType = ReturnType_; static constexpr size_t NUM_ARGS = sizeof...(Args); @@ -26,15 +29,15 @@ struct FuncTraits { using ArgType = std::tuple_element_t>; }; -template -void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { const Id forward_id{inst->Definition()}; const bool has_forward_id{Sirit::ValidId(forward_id)}; Id current_id{}; if (has_forward_id) { current_id = ctx.ExchangeCurrentId(forward_id); } - const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; + const Id new_id{func(ctx, std::forward(args)...)}; if (has_forward_id) { ctx.ExchangeCurrentId(current_id); } else { @@ -55,42 +58,62 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(emit, ctx, inst, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(emit, ctx, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - (emit.*method)(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - (emit.*method)(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { - Invoke(emit, ctx, inst, std::make_index_sequence<0>{}); + Invoke(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; static constexpr bool is_first_arg_inst = std::is_same_v; using Indices = std::make_index_sequence; - Invoke(emit, ctx, inst, Indices{}); + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); } } } // Anonymous namespace -EmitSPIRV::EmitSPIRV(IR::Program& program) { +std::vector EmitSPIRV(Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -112,43 +135,17 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { if (program.info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); - - std::vector result{ctx.Assemble()}; - std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; - std::fwrite(result.data(), sizeof(u32), result.size(), file); - std::fclose(file); - std::system("spirv-dis D:\\shader.spv") == 0 && - std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && - std::system("spirv-cross -V D:\\shader.spv") == 0; -} + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); -void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { -#define OPCODE(name, result_type, ...) \ - case IR::Opcode::name: \ - return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE - } - throw LogicError("Invalid opcode {}", inst->Opcode()); -} + const std::array workgroup_size{env.WorkgroupSize()}; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], + workgroup_size[2]); -static Id TypeId(const EmitContext& ctx, IR::Type type) { - switch (type) { - case IR::Type::U1: - return ctx.U1; - case IR::Type::U32: - return ctx.U32[1]; - default: - throw NotImplementedException("Phi node type {}", type); - } + return ctx.Assemble(); } -Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; boost::container::small_vector operands; operands.reserve(num_args * 2); @@ -178,25 +175,25 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } -void EmitSPIRV::EmitVoid(EmitContext&) {} +void EmitVoid(EmitContext&) {} -Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) { +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return ctx.Def(value); } -void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { +void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { +void EmitGetSignFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { +void EmitGetCarryFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { +void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8bde82613..5813f51ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,223 +8,218 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -class EmitSPIRV { -public: - explicit EmitSPIRV(IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); -private: - void EmitInst(EmitContext& ctx, IR::Inst* inst); - - // Microinstruction emitters - Id EmitPhi(EmitContext& ctx, IR::Inst* inst); - void EmitVoid(EmitContext& ctx); - Id EmitIdentity(EmitContext& ctx, const IR::Value& value); - void EmitBranch(EmitContext& ctx, IR::Block* label); - void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); - void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); - void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); - void EmitReturn(EmitContext& ctx); - void EmitGetRegister(EmitContext& ctx); - void EmitSetRegister(EmitContext& ctx); - void EmitGetPred(EmitContext& ctx); - void EmitSetPred(EmitContext& ctx); - void EmitSetGotoVariable(EmitContext& ctx); - void EmitGetGotoVariable(EmitContext& ctx); - Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitGetAttribute(EmitContext& ctx); - void EmitSetAttribute(EmitContext& ctx); - void EmitGetAttributeIndexed(EmitContext& ctx); - void EmitSetAttributeIndexed(EmitContext& ctx); - void EmitGetZFlag(EmitContext& ctx); - void EmitGetSFlag(EmitContext& ctx); - void EmitGetCFlag(EmitContext& ctx); - void EmitGetOFlag(EmitContext& ctx); - void EmitSetZFlag(EmitContext& ctx); - void EmitSetSFlag(EmitContext& ctx); - void EmitSetCFlag(EmitContext& ctx); - void EmitSetOFlag(EmitContext& ctx); - Id EmitWorkgroupId(EmitContext& ctx); - Id EmitLocalInvocationId(EmitContext& ctx); - Id EmitUndefU1(EmitContext& ctx); - Id EmitUndefU8(EmitContext& ctx); - Id EmitUndefU16(EmitContext& ctx); - Id EmitUndefU32(EmitContext& ctx); - Id EmitUndefU64(EmitContext& ctx); - void EmitLoadGlobalU8(EmitContext& ctx); - void EmitLoadGlobalS8(EmitContext& ctx); - void EmitLoadGlobalU16(EmitContext& ctx); - void EmitLoadGlobalS16(EmitContext& ctx); - void EmitLoadGlobal32(EmitContext& ctx); - void EmitLoadGlobal64(EmitContext& ctx); - void EmitLoadGlobal128(EmitContext& ctx); - void EmitWriteGlobalU8(EmitContext& ctx); - void EmitWriteGlobalS8(EmitContext& ctx); - void EmitWriteGlobalU16(EmitContext& ctx); - void EmitWriteGlobalS16(EmitContext& ctx); - void EmitWriteGlobal32(EmitContext& ctx); - void EmitWriteGlobal64(EmitContext& ctx); - void EmitWriteGlobal128(EmitContext& ctx); - void EmitLoadStorageU8(EmitContext& ctx); - void EmitLoadStorageS8(EmitContext& ctx); - void EmitLoadStorageU16(EmitContext& ctx); - void EmitLoadStorageS16(EmitContext& ctx); - Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitLoadStorage64(EmitContext& ctx); - void EmitLoadStorage128(EmitContext& ctx); - void EmitWriteStorageU8(EmitContext& ctx); - void EmitWriteStorageS8(EmitContext& ctx); - void EmitWriteStorageU16(EmitContext& ctx); - void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); - void EmitWriteStorage64(EmitContext& ctx); - void EmitWriteStorage128(EmitContext& ctx); - void EmitCompositeConstructU32x2(EmitContext& ctx); - void EmitCompositeConstructU32x3(EmitContext& ctx); - void EmitCompositeConstructU32x4(EmitContext& ctx); - void EmitCompositeExtractU32x2(EmitContext& ctx); - Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); - void EmitCompositeExtractU32x4(EmitContext& ctx); - void EmitCompositeConstructF16x2(EmitContext& ctx); - void EmitCompositeConstructF16x3(EmitContext& ctx); - void EmitCompositeConstructF16x4(EmitContext& ctx); - void EmitCompositeExtractF16x2(EmitContext& ctx); - void EmitCompositeExtractF16x3(EmitContext& ctx); - void EmitCompositeExtractF16x4(EmitContext& ctx); - void EmitCompositeConstructF32x2(EmitContext& ctx); - void EmitCompositeConstructF32x3(EmitContext& ctx); - void EmitCompositeConstructF32x4(EmitContext& ctx); - void EmitCompositeExtractF32x2(EmitContext& ctx); - void EmitCompositeExtractF32x3(EmitContext& ctx); - void EmitCompositeExtractF32x4(EmitContext& ctx); - void EmitCompositeConstructF64x2(EmitContext& ctx); - void EmitCompositeConstructF64x3(EmitContext& ctx); - void EmitCompositeConstructF64x4(EmitContext& ctx); - void EmitCompositeExtractF64x2(EmitContext& ctx); - void EmitCompositeExtractF64x3(EmitContext& ctx); - void EmitCompositeExtractF64x4(EmitContext& ctx); - void EmitSelect8(EmitContext& ctx); - void EmitSelect16(EmitContext& ctx); - void EmitSelect32(EmitContext& ctx); - void EmitSelect64(EmitContext& ctx); - void EmitBitCastU16F16(EmitContext& ctx); - Id EmitBitCastU32F32(EmitContext& ctx, Id value); - void EmitBitCastU64F64(EmitContext& ctx); - void EmitBitCastF16U16(EmitContext& ctx); - Id EmitBitCastF32U32(EmitContext& ctx, Id value); - void EmitBitCastF64U64(EmitContext& ctx); - void EmitPackUint2x32(EmitContext& ctx); - void EmitUnpackUint2x32(EmitContext& ctx); - void EmitPackFloat2x16(EmitContext& ctx); - void EmitUnpackFloat2x16(EmitContext& ctx); - void EmitPackDouble2x32(EmitContext& ctx); - void EmitUnpackDouble2x32(EmitContext& ctx); - void EmitGetZeroFromOp(EmitContext& ctx); - void EmitGetSignFromOp(EmitContext& ctx); - void EmitGetCarryFromOp(EmitContext& ctx); - void EmitGetOverflowFromOp(EmitContext& ctx); - void EmitFPAbs16(EmitContext& ctx); - void EmitFPAbs32(EmitContext& ctx); - void EmitFPAbs64(EmitContext& ctx); - Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - void EmitFPMax32(EmitContext& ctx); - void EmitFPMax64(EmitContext& ctx); - void EmitFPMin32(EmitContext& ctx); - void EmitFPMin64(EmitContext& ctx); - Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitFPNeg16(EmitContext& ctx); - void EmitFPNeg32(EmitContext& ctx); - void EmitFPNeg64(EmitContext& ctx); - void EmitFPRecip32(EmitContext& ctx); - void EmitFPRecip64(EmitContext& ctx); - void EmitFPRecipSqrt32(EmitContext& ctx); - void EmitFPRecipSqrt64(EmitContext& ctx); - void EmitFPSqrt(EmitContext& ctx); - void EmitFPSin(EmitContext& ctx); - void EmitFPSinNotReduced(EmitContext& ctx); - void EmitFPExp2(EmitContext& ctx); - void EmitFPExp2NotReduced(EmitContext& ctx); - void EmitFPCos(EmitContext& ctx); - void EmitFPCosNotReduced(EmitContext& ctx); - void EmitFPLog2(EmitContext& ctx); - void EmitFPSaturate16(EmitContext& ctx); - void EmitFPSaturate32(EmitContext& ctx); - void EmitFPSaturate64(EmitContext& ctx); - void EmitFPRoundEven16(EmitContext& ctx); - void EmitFPRoundEven32(EmitContext& ctx); - void EmitFPRoundEven64(EmitContext& ctx); - void EmitFPFloor16(EmitContext& ctx); - void EmitFPFloor32(EmitContext& ctx); - void EmitFPFloor64(EmitContext& ctx); - void EmitFPCeil16(EmitContext& ctx); - void EmitFPCeil32(EmitContext& ctx); - void EmitFPCeil64(EmitContext& ctx); - void EmitFPTrunc16(EmitContext& ctx); - void EmitFPTrunc32(EmitContext& ctx); - void EmitFPTrunc64(EmitContext& ctx); - Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitIAdd64(EmitContext& ctx); - Id EmitISub32(EmitContext& ctx, Id a, Id b); - void EmitISub64(EmitContext& ctx); - Id EmitIMul32(EmitContext& ctx, Id a, Id b); - void EmitINeg32(EmitContext& ctx); - void EmitIAbs32(EmitContext& ctx); - Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); - void EmitShiftRightLogical32(EmitContext& ctx); - void EmitShiftRightArithmetic32(EmitContext& ctx); - void EmitBitwiseAnd32(EmitContext& ctx); - void EmitBitwiseOr32(EmitContext& ctx); - void EmitBitwiseXor32(EmitContext& ctx); - void EmitBitFieldInsert(EmitContext& ctx); - void EmitBitFieldSExtract(EmitContext& ctx); - Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); - Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitULessThan(EmitContext& ctx); - void EmitIEqual(EmitContext& ctx); - void EmitSLessThanEqual(EmitContext& ctx); - void EmitULessThanEqual(EmitContext& ctx); - Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitUGreaterThan(EmitContext& ctx); - void EmitINotEqual(EmitContext& ctx); - void EmitSGreaterThanEqual(EmitContext& ctx); - Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); - void EmitLogicalOr(EmitContext& ctx); - void EmitLogicalAnd(EmitContext& ctx); - void EmitLogicalXor(EmitContext& ctx); - void EmitLogicalNot(EmitContext& ctx); - void EmitConvertS16F16(EmitContext& ctx); - void EmitConvertS16F32(EmitContext& ctx); - void EmitConvertS16F64(EmitContext& ctx); - void EmitConvertS32F16(EmitContext& ctx); - void EmitConvertS32F32(EmitContext& ctx); - void EmitConvertS32F64(EmitContext& ctx); - void EmitConvertS64F16(EmitContext& ctx); - void EmitConvertS64F32(EmitContext& ctx); - void EmitConvertS64F64(EmitContext& ctx); - void EmitConvertU16F16(EmitContext& ctx); - void EmitConvertU16F32(EmitContext& ctx); - void EmitConvertU16F64(EmitContext& ctx); - void EmitConvertU32F16(EmitContext& ctx); - void EmitConvertU32F32(EmitContext& ctx); - void EmitConvertU32F64(EmitContext& ctx); - void EmitConvertU64F16(EmitContext& ctx); - void EmitConvertU64F32(EmitContext& ctx); - void EmitConvertU64F64(EmitContext& ctx); - void EmitConvertU64U32(EmitContext& ctx); - void EmitConvertU32U64(EmitContext& ctx); -}; +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, IR::Block* label); +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitReturn(EmitContext& ctx); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetAttributeIndexed(EmitContext& ctx); +void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx); +void EmitLoadGlobal64(EmitContext& ctx); +void EmitLoadGlobal128(EmitContext& ctx); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx); +void EmitWriteGlobal64(EmitContext& ctx); +void EmitWriteGlobal128(EmitContext& ctx); +void EmitLoadStorageU8(EmitContext& ctx); +void EmitLoadStorageS8(EmitContext& ctx); +void EmitLoadStorageU16(EmitContext& ctx); +void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx); +void EmitLoadStorage128(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx); +void EmitWriteStorageS8(EmitContext& ctx); +void EmitWriteStorageU16(EmitContext& ctx); +void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx); +void EmitCompositeConstructU32x2(EmitContext& ctx); +void EmitCompositeConstructU32x3(EmitContext& ctx); +void EmitCompositeConstructU32x4(EmitContext& ctx); +void EmitCompositeExtractU32x2(EmitContext& ctx); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx); +void EmitCompositeConstructF16x2(EmitContext& ctx); +void EmitCompositeConstructF16x3(EmitContext& ctx); +void EmitCompositeConstructF16x4(EmitContext& ctx); +void EmitCompositeExtractF16x2(EmitContext& ctx); +void EmitCompositeExtractF16x3(EmitContext& ctx); +void EmitCompositeExtractF16x4(EmitContext& ctx); +void EmitCompositeConstructF32x2(EmitContext& ctx); +void EmitCompositeConstructF32x3(EmitContext& ctx); +void EmitCompositeConstructF32x4(EmitContext& ctx); +void EmitCompositeExtractF32x2(EmitContext& ctx); +void EmitCompositeExtractF32x3(EmitContext& ctx); +void EmitCompositeExtractF32x4(EmitContext& ctx); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitSelect8(EmitContext& ctx); +void EmitSelect16(EmitContext& ctx); +void EmitSelect32(EmitContext& ctx); +void EmitSelect64(EmitContext& ctx); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx); +void EmitUnpackUint2x32(EmitContext& ctx); +void EmitPackFloat2x16(EmitContext& ctx); +void EmitUnpackFloat2x16(EmitContext& ctx); +void EmitPackDouble2x32(EmitContext& ctx); +void EmitUnpackDouble2x32(EmitContext& ctx); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx); +void EmitFPAbs32(EmitContext& ctx); +void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +void EmitFPMax32(EmitContext& ctx); +void EmitFPMax64(EmitContext& ctx); +void EmitFPMin32(EmitContext& ctx); +void EmitFPMin64(EmitContext& ctx); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitFPNeg16(EmitContext& ctx); +void EmitFPNeg32(EmitContext& ctx); +void EmitFPNeg64(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx); +void EmitFPRecip64(EmitContext& ctx); +void EmitFPRecipSqrt32(EmitContext& ctx); +void EmitFPRecipSqrt64(EmitContext& ctx); +void EmitFPSqrt(EmitContext& ctx); +void EmitFPSin(EmitContext& ctx); +void EmitFPSinNotReduced(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPExp2NotReduced(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPCosNotReduced(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPRoundEven16(EmitContext& ctx); +void EmitFPRoundEven32(EmitContext& ctx); +void EmitFPRoundEven64(EmitContext& ctx); +void EmitFPFloor16(EmitContext& ctx); +void EmitFPFloor32(EmitContext& ctx); +void EmitFPFloor64(EmitContext& ctx); +void EmitFPCeil16(EmitContext& ctx); +void EmitFPCeil32(EmitContext& ctx); +void EmitFPCeil64(EmitContext& ctx); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitIAdd64(EmitContext& ctx); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +void EmitISub64(EmitContext& ctx); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +void EmitINeg32(EmitContext& ctx); +void EmitIAbs32(EmitContext& ctx); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +void EmitShiftRightLogical32(EmitContext& ctx); +void EmitShiftRightArithmetic32(EmitContext& ctx); +void EmitBitwiseAnd32(EmitContext& ctx); +void EmitBitwiseOr32(EmitContext& ctx); +void EmitBitwiseXor32(EmitContext& ctx); +void EmitBitFieldInsert(EmitContext& ctx); +void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitULessThan(EmitContext& ctx); +void EmitIEqual(EmitContext& ctx); +void EmitSLessThanEqual(EmitContext& ctx); +void EmitULessThanEqual(EmitContext& ctx); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitUGreaterThan(EmitContext& ctx); +void EmitINotEqual(EmitContext& ctx); +void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +void EmitLogicalOr(EmitContext& ctx); +void EmitLogicalAnd(EmitContext& ctx); +void EmitLogicalXor(EmitContext& ctx); +void EmitLogicalNot(EmitContext& ctx); +void EmitConvertS16F16(EmitContext& ctx); +void EmitConvertS16F32(EmitContext& ctx); +void EmitConvertS16F64(EmitContext& ctx); +void EmitConvertS32F16(EmitContext& ctx); +void EmitConvertS32F32(EmitContext& ctx); +void EmitConvertS32F64(EmitContext& ctx); +void EmitConvertS64F16(EmitContext& ctx); +void EmitConvertS64F32(EmitContext& ctx); +void EmitConvertS64F64(EmitContext& ctx); +void EmitConvertU16F16(EmitContext& ctx); +void EmitConvertU16F32(EmitContext& ctx); +void EmitConvertU16F64(EmitContext& ctx); +void EmitConvertU32F16(EmitContext& ctx); +void EmitConvertU32F32(EmitContext& ctx); +void EmitConvertU32F64(EmitContext& ctx); +void EmitConvertU64F16(EmitContext& ctx); +void EmitConvertU64F32(EmitContext& ctx); +void EmitConvertU64F64(EmitContext& ctx); +void EmitConvertU64U32(EmitContext& ctx); +void EmitConvertU32U64(EmitContext& ctx); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index af82df99c..49c200498 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -6,51 +6,51 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { +void EmitBitCastU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { +void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { +void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { +void EmitBitCastF64U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackUint2x32(EmitContext&) { +void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { +void EmitUnpackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { +void EmitPackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { +void EmitUnpackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { +void EmitPackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { +void EmitUnpackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index a7374c89d..348e4796d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,99 +6,99 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { +void EmitCompositeConstructU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { +void EmitCompositeConstructU32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { +void EmitCompositeConstructU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { +void EmitCompositeExtractU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } -void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { +void EmitCompositeExtractU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { +void EmitCompositeConstructF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { +void EmitCompositeConstructF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { +void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { +void EmitCompositeExtractF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { +void EmitCompositeExtractF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { +void EmitCompositeExtractF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { +void EmitCompositeConstructF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { +void EmitCompositeConstructF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { +void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { +void EmitCompositeExtractF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { +void EmitCompositeExtractF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { +void EmitCompositeExtractF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { +void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { +void EmitCompositeConstructF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { +void EmitCompositeConstructF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { +void EmitCompositeExtractF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { +void EmitCompositeExtractF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { +void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f4c9970eb..eb9c01c5a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,31 +6,31 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitGetRegister(EmitContext&) { +void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetRegister(EmitContext&) { +void EmitSetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetPred(EmitContext&) { +void EmitGetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetPred(EmitContext&) { +void EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { +void EmitSetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { +void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -43,59 +43,59 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: return ctx.OpLoad(ctx.U32[1], access_chain); } -void EmitSPIRV::EmitGetAttribute(EmitContext&) { +void EmitGetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttribute(EmitContext&) { +void EmitSetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { +void EmitGetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { +void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetZFlag(EmitContext&) { +void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetSFlag(EmitContext&) { +void EmitGetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetCFlag(EmitContext&) { +void EmitGetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetOFlag(EmitContext&) { +void EmitGetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetZFlag(EmitContext&) { +void EmitSetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetSFlag(EmitContext&) { +void EmitSetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetCFlag(EmitContext&) { +void EmitSetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetOFlag(EmitContext&) { +void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { +Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } -Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { +Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 549c1907a..6c4199664 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,25 +6,25 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { +void EmitBranch(EmitContext& ctx, IR::Block* label) { ctx.OpBranch(label->Definition()); } -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), spv::LoopControlMask::MaskNone); } -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); } -void EmitSPIRV::EmitReturn(EmitContext& ctx) { +void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9bc121f8..d24fbb353 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -33,187 +33,187 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { } // Anonymous namespace -void EmitSPIRV::EmitFPAbs16(EmitContext&) { +void EmitFPAbs16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs32(EmitContext&) { +void EmitFPAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs64(EmitContext&) { +void EmitFPAbs64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } -Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitSPIRV::EmitFPMax32(EmitContext&) { +void EmitFPMax32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMax64(EmitContext&) { +void EmitFPMax64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin32(EmitContext&) { +void EmitFPMin32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin64(EmitContext&) { +void EmitFPMin64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitSPIRV::EmitFPNeg16(EmitContext&) { +void EmitFPNeg16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg32(EmitContext&) { +void EmitFPNeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg64(EmitContext&) { +void EmitFPNeg64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip32(EmitContext&) { +void EmitFPRecip32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip64(EmitContext&) { +void EmitFPRecip64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { +void EmitFPRecipSqrt32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { +void EmitFPRecipSqrt64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSqrt(EmitContext&) { +void EmitFPSqrt(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSin(EmitContext&) { +void EmitFPSin(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { +void EmitFPSinNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2(EmitContext&) { +void EmitFPExp2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { +void EmitFPExp2NotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCos(EmitContext&) { +void EmitFPCos(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { +void EmitFPCosNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPLog2(EmitContext&) { +void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate16(EmitContext&) { +void EmitFPSaturate16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate32(EmitContext&) { +void EmitFPSaturate32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate64(EmitContext&) { +void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { +void EmitFPRoundEven16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { +void EmitFPRoundEven32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { +void EmitFPRoundEven64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor16(EmitContext&) { +void EmitFPFloor16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor32(EmitContext&) { +void EmitFPFloor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor64(EmitContext&) { +void EmitFPFloor64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil16(EmitContext&) { +void EmitFPCeil16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil32(EmitContext&) { +void EmitFPCeil32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil64(EmitContext&) { +void EmitFPCeil64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc16(EmitContext&) { +void EmitFPTrunc16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc32(EmitContext&) { +void EmitFPTrunc32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc64(EmitContext&) { +void EmitFPTrunc64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 32af94a73..a1d16b81e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -6,126 +6,126 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } return ctx.OpIAdd(ctx.U32[1], a, b); } -void EmitSPIRV::EmitIAdd64(EmitContext&) { +void EmitIAdd64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { +Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitSPIRV::EmitISub64(EmitContext&) { +void EmitISub64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } -void EmitSPIRV::EmitINeg32(EmitContext&) { +void EmitINeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIAbs32(EmitContext&) { +void EmitIAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { +void EmitShiftRightLogical32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { +void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { +void EmitBitwiseAnd32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { +void EmitBitwiseOr32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { +void EmitBitwiseXor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { +void EmitBitFieldInsert(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { +void EmitBitFieldSExtract(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } -Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitULessThan(EmitContext&) { +void EmitULessThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIEqual(EmitContext&) { +void EmitIEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { +void EmitSLessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitULessThanEqual(EmitContext&) { +void EmitULessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitUGreaterThan(EmitContext&) { +void EmitUGreaterThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitINotEqual(EmitContext&) { +void EmitINotEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { +void EmitSGreaterThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitLogicalOr(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalAnd(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalXor(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalNot(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index 7b43c4ed8..ff2f4fb74 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,83 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitConvertS16F16(EmitContext&) { +void EmitConvertS16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F32(EmitContext&) { +void EmitConvertS16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F64(EmitContext&) { +void EmitConvertS16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F16(EmitContext&) { +void EmitConvertS32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F32(EmitContext&) { +void EmitConvertS32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F64(EmitContext&) { +void EmitConvertS32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F16(EmitContext&) { +void EmitConvertS64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F32(EmitContext&) { +void EmitConvertS64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F64(EmitContext&) { +void EmitConvertS64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F16(EmitContext&) { +void EmitConvertU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F32(EmitContext&) { +void EmitConvertU16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F64(EmitContext&) { +void EmitConvertU16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F16(EmitContext&) { +void EmitConvertU32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F32(EmitContext&) { +void EmitConvertU32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F64(EmitContext&) { +void EmitConvertU32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F16(EmitContext&) { +void EmitConvertU64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F32(EmitContext&) { +void EmitConvertU64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F64(EmitContext&) { +void EmitConvertU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64U32(EmitContext&) { +void EmitConvertU64U32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32U64(EmitContext&) { +void EmitConvertU32U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 5769a3c95..77d698ffd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,79 +22,79 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { +void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { +void EmitLoadGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { +void EmitLoadGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { +void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { +void EmitLoadGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { +void EmitLoadGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { +void EmitLoadGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { +void EmitWriteGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { +void EmitWriteGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { +void EmitWriteGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { +void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { +void EmitWriteGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { +void EmitWriteGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { +void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { +void EmitLoadStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { +void EmitLoadStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { +void EmitLoadStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { +void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -105,31 +105,31 @@ Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, return ctx.OpLoad(ctx.U32[1], pointer); } -void EmitSPIRV::EmitLoadStorage64(EmitContext&) { +void EmitLoadStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorage128(EmitContext&) { +void EmitLoadStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { +void EmitWriteStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { +void EmitWriteStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { +void EmitWriteStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { +void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -140,11 +140,11 @@ void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitSPIRV::EmitWriteStorage64(EmitContext&) { +void EmitWriteStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage128(EmitContext&) { +void EmitWriteStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 40a856f72..8d5062724 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,19 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitSelect8(EmitContext&) { +void EmitSelect8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect16(EmitContext&) { +void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect32(EmitContext&) { +void EmitSelect32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect64(EmitContext&) { +void EmitSelect64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index c1ed8f281..19b06dbe4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { +Id EmitUndefU1(EmitContext& ctx) { return ctx.OpUndef(ctx.U1); } -Id EmitSPIRV::EmitUndefU8(EmitContext&) { +Id EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU16(EmitContext&) { +Id EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { +Id EmitUndefU32(EmitContext& ctx) { return ctx.OpUndef(ctx.U32[1]); } -Id EmitSPIRV::EmitUndefU64(EmitContext&) { +Id EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index f6230e817..0ba681fb9 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "common/common_types.h" namespace Shader { @@ -8,7 +10,9 @@ class Environment { public: virtual ~Environment() = default; - [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0; + [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index b34bf462b..5127523f9 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -29,7 +29,7 @@ FileEnvironment::FileEnvironment(const char* path) { FileEnvironment::~FileEnvironment() = default; -u64 FileEnvironment::ReadInstruction(u32 offset) const { +u64 FileEnvironment::ReadInstruction(u32 offset) { if (offset % 8 != 0) { throw InvalidArgument("offset={} is not aligned to 8", offset); } @@ -39,4 +39,8 @@ u64 FileEnvironment::ReadInstruction(u32 offset) const { return data[offset / 8]; } +std::array FileEnvironment::WorkgroupSize() { + return {1, 1, 1}; +} + } // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index c294bc6fa..b8c4bbadd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -12,7 +12,9 @@ public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; - u64 ReadInstruction(u32 offset) const override; + u64 ReadInstruction(u32 offset) override; + + std::array WorkgroupSize() override; private: std::vector data; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 5ae91dd7d..ec029dfd6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map& block_to_ind return fmt::format("#{}", arg.U32()); case Type::U64: return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); case Type::Reg: return fmt::format("{}", arg.Reg()); case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index a48b8dec5..8709a2ea1 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) { if (!visited.insert(branch).second) { return false; } - // Calling push_back twice is faster than insert on msvc + // Calling push_back twice is faster than insert on MSVC block_stack.push_back(block); block_stack.push_back(branch); return true; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c..8c44ebb29 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d7..8bd468244 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d1068..c3c4b9abd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acb..6b2a1356b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 1610bb34e..050a37f18 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - Backend::SPIRV::EmitSPIRV spirv{program}; + void(Backend::SPIRV::EmitSPIRV(env, program)); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..c96d783b7 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +struct Profile { + bool unified_descriptor_binding; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp new file mode 100644 index 000000000..b25081e39 --- /dev/null +++ b/src/shader_recompiler/recompiler.cpp @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader { + +std::pair> RecompileSPIRV(Environment& env, u32 start_address) { + ObjectPool flow_block_pool; + ObjectPool inst_pool; + ObjectPool block_pool; + + Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; +} + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h new file mode 100644 index 000000000..4cb973878 --- /dev/null +++ b/src/shader_recompiler/recompiler.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader { + +[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c5ce71706..3323e6916 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -123,6 +120,7 @@ add_library(video_core STATIC renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -201,7 +199,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 0d7683c2d..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,7 +12,6 @@ #include "common/common_types.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast(1U << bit_position); - break; - case '1': - expect |= static_cast(1U << bit_position); - mask |= static_cast(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector GetDecodeTable() { - std::vector table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496..588ce6139 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include +#include + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); +} + +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace + +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); -ComputePipeline::ComputePipeline() = default; + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} -ComputePipeline::~ComputePipeline() = default; +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3d..dc045d524 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..3bea1ff44 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..2501f9967 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..b06288403 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180..4bf3e4819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + + ~Environment() override = default; + + [[nodiscard]] std::optional Analyze(u32 start_address) { + const std::optional size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + } -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(StageFromProgram(static_cast(program))); -} + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast(data.get()), size); } -} + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read(program_base + address); + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector code; + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; + } + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340d..eb35abc27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -67,13 +67,13 @@ struct hash { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map> compute_cache; + std::unordered_map compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d9..b757454c4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b..3fd03b915 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector ticks; ///< Ticks for each resource -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 29 ++++-- src/shader_recompiler/backend/spirv/emit_context.h | 6 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.h | 103 +++++++++++---------- .../spirv/emit_spirv_bitwise_conversion.cpp | 28 ++++-- .../backend/spirv/emit_spirv_composite.cpp | 48 +++++----- .../backend/spirv/emit_spirv_control_flow.cpp | 2 +- .../backend/spirv/emit_spirv_convert.cpp | 89 ++++++++++++++++++ .../backend/spirv/emit_spirv_floating_point.cpp | 48 +++++----- .../backend/spirv/emit_spirv_integer.cpp | 16 ---- .../backend/spirv/emit_spirv_logical.cpp | 72 +------------- .../backend/spirv/emit_spirv_memory.cpp | 22 +++-- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 70 +++++++------- .../frontend/ir/microinstruction.cpp | 4 + .../frontend/ir/microinstruction.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- src/shader_recompiler/frontend/ir/program.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../impl/floating_point_conversion_integer.cpp | 62 ++++++++++--- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 18 +++- .../ir_opt/constant_propagation_pass.cpp | 12 +-- .../ir_opt/lower_fp16_to_fp32.cpp | 79 ++++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + src/shader_recompiler/main.cpp | 10 +- src/shader_recompiler/object_pool.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 - src/video_core/vulkan_common/vulkan_device.cpp | 10 +- 32 files changed, 479 insertions(+), 285 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp create mode 100644 src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b56bdd3d9..6047f3ebe 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_composite.cpp backend/spirv/emit_spirv_context_get_set.cpp backend/spirv/emit_spirv_control_flow.cpp + backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp @@ -82,6 +83,7 @@ add_library(shader_recompiler STATIC ir_opt/dead_code_elimination_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp + ir_opt/lower_fp16_to_fp32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 770067d98..ea1c8a3be 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -30,8 +30,11 @@ EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { DefineCommonTypes(program.info); DefineCommonConstants(); DefineSpecialVariables(program.info); - DefineConstantBuffers(program.info); - DefineStorageBuffers(program.info); + + u32 binding{}; + DefineConstantBuffers(program.info, binding); + DefineStorageBuffers(program.info, binding); + DefineLabels(program); } @@ -58,6 +61,12 @@ void EmitContext::DefineCommonTypes(const Info& info) { U1 = Name(TypeBool(), "u1"); + // TODO: Conditionally define these + AddCapability(spv::Capability::Int16); + AddCapability(spv::Capability::Int64); + U16 = Name(TypeInt(16, false), "u16"); + U64 = Name(TypeInt(64, false), "u64"); + F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); @@ -95,12 +104,12 @@ void EmitContext::DefineSpecialVariables(const Info& info) { } } -void EmitContext::DefineConstantBuffers(const Info& info) { +void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; - Decorate(array_type, spv::Decoration::ArrayStride, 16U); + Decorate(array_type, spv::Decoration::ArrayStride, 4U); const Id struct_type{TypeStruct(array_type)}; Name(struct_type, "cbuf_block"); @@ -111,18 +120,19 @@ void EmitContext::DefineConstantBuffers(const Info& info) { const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); - u32 binding{}; + u32 index{}; for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); + index += desc.count; binding += desc.count; } } -void EmitContext::DefineStorageBuffers(const Info& info) { +void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { if (info.storage_buffers_descriptors.empty()) { return; } @@ -140,13 +150,14 @@ void EmitContext::DefineStorageBuffers(const Info& info) { const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); - u32 binding{}; + u32 index{}; for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("ssbo{}", binding)); - std::fill_n(ssbos.data() + binding, desc.count, id); + Name(id, fmt::format("ssbo{}", index)); + std::fill_n(ssbos.data() + index, desc.count, id); + index += desc.count; binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c4b84759d..8de203da2 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -37,6 +37,8 @@ public: Id void_id{}; Id U1{}; + Id U16{}; + Id U64{}; VectorTypes F32; VectorTypes U32; VectorTypes F16; @@ -59,8 +61,8 @@ private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineSpecialVariables(const Info& info); - void DefineConstantBuffers(const Info& info); - void DefineStorageBuffers(const Info& info); + void DefineConstantBuffers(const Info& info, u32& binding); + void DefineStorageBuffers(const Info& info, u32& binding); void DefineLabels(IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d59718435..4ce07c281 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,6 +14,8 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#pragma optimize("", off) + namespace Shader::Backend::SPIRV { namespace { template diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5813f51ff..2b59c0b72 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -79,26 +79,27 @@ void EmitWriteStorageU16(EmitContext& ctx); void EmitWriteStorageS16(EmitContext& ctx); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage128(EmitContext& ctx); -void EmitCompositeConstructU32x2(EmitContext& ctx); -void EmitCompositeConstructU32x3(EmitContext& ctx); -void EmitCompositeConstructU32x4(EmitContext& ctx); -void EmitCompositeExtractU32x2(EmitContext& ctx); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF16x2(EmitContext& ctx); void EmitCompositeConstructF16x3(EmitContext& ctx); void EmitCompositeConstructF16x4(EmitContext& ctx); -void EmitCompositeExtractF16x2(EmitContext& ctx); -void EmitCompositeExtractF16x3(EmitContext& ctx); -void EmitCompositeExtractF16x4(EmitContext& ctx); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF32x2(EmitContext& ctx); void EmitCompositeConstructF32x3(EmitContext& ctx); void EmitCompositeConstructF32x4(EmitContext& ctx); -void EmitCompositeExtractF32x2(EmitContext& ctx); -void EmitCompositeExtractF32x3(EmitContext& ctx); -void EmitCompositeExtractF32x4(EmitContext& ctx); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -116,11 +117,13 @@ void EmitBitCastF16U16(EmitContext& ctx); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); void EmitPackUint2x32(EmitContext& ctx); -void EmitUnpackUint2x32(EmitContext& ctx); -void EmitPackFloat2x16(EmitContext& ctx); -void EmitUnpackFloat2x16(EmitContext& ctx); -void EmitPackDouble2x32(EmitContext& ctx); -void EmitUnpackDouble2x32(EmitContext& ctx); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); @@ -159,18 +162,18 @@ void EmitFPLog2(EmitContext& ctx); void EmitFPSaturate16(EmitContext& ctx); void EmitFPSaturate32(EmitContext& ctx); void EmitFPSaturate64(EmitContext& ctx); -void EmitFPRoundEven16(EmitContext& ctx); -void EmitFPRoundEven32(EmitContext& ctx); -void EmitFPRoundEven64(EmitContext& ctx); -void EmitFPFloor16(EmitContext& ctx); -void EmitFPFloor32(EmitContext& ctx); -void EmitFPFloor64(EmitContext& ctx); -void EmitFPCeil16(EmitContext& ctx); -void EmitFPCeil32(EmitContext& ctx); -void EmitFPCeil64(EmitContext& ctx); -void EmitFPTrunc16(EmitContext& ctx); -void EmitFPTrunc32(EmitContext& ctx); -void EmitFPTrunc64(EmitContext& ctx); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); @@ -201,25 +204,25 @@ void EmitLogicalOr(EmitContext& ctx); void EmitLogicalAnd(EmitContext& ctx); void EmitLogicalXor(EmitContext& ctx); void EmitLogicalNot(EmitContext& ctx); -void EmitConvertS16F16(EmitContext& ctx); -void EmitConvertS16F32(EmitContext& ctx); -void EmitConvertS16F64(EmitContext& ctx); -void EmitConvertS32F16(EmitContext& ctx); -void EmitConvertS32F32(EmitContext& ctx); -void EmitConvertS32F64(EmitContext& ctx); -void EmitConvertS64F16(EmitContext& ctx); -void EmitConvertS64F32(EmitContext& ctx); -void EmitConvertS64F64(EmitContext& ctx); -void EmitConvertU16F16(EmitContext& ctx); -void EmitConvertU16F32(EmitContext& ctx); -void EmitConvertU16F64(EmitContext& ctx); -void EmitConvertU32F16(EmitContext& ctx); -void EmitConvertU32F32(EmitContext& ctx); -void EmitConvertU32F64(EmitContext& ctx); -void EmitConvertU64F16(EmitContext& ctx); -void EmitConvertU64F32(EmitContext& ctx); -void EmitConvertU64F64(EmitContext& ctx); -void EmitConvertU64U32(EmitContext& ctx); -void EmitConvertU32U64(EmitContext& ctx); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 49c200498..e0d1ba413 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -34,24 +34,32 @@ void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitUnpackUint2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); } -void EmitPackFloat2x16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); } -void EmitUnpackFloat2x16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F16[2], value); } -void EmitPackDouble2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpPackHalf2x16(ctx.U32[1], value); } -void EmitUnpackDouble2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackHalf2x16(ctx.F32[2], value); +} + +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F64[1], value); +} + +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 348e4796d..c950854a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,28 +6,28 @@ namespace Shader::Backend::SPIRV { -void EmitCompositeConstructU32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); } -void EmitCompositeConstructU32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); } -void EmitCompositeConstructU32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); } -void EmitCompositeExtractU32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { - return ctx.OpCompositeExtract(ctx.U32[1], vector, index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -void EmitCompositeExtractU32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } void EmitCompositeConstructF16x2(EmitContext&) { @@ -42,16 +42,16 @@ void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitCompositeExtractF16x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeExtractF16x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeExtractF16x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } void EmitCompositeConstructF32x2(EmitContext&) { @@ -66,16 +66,16 @@ void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitCompositeExtractF32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } -void EmitCompositeExtractF32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } -void EmitCompositeExtractF32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } void EmitCompositeConstructF64x2(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6c4199664..48755b827 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -11,7 +11,7 @@ void EmitBranch(EmitContext& ctx, IR::Block* label) { } void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { + IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..76ccaffce --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index d24fbb353..9ef180531 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -169,52 +169,52 @@ void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitFPRoundEven16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven16(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F16[1], value); } -void EmitFPRoundEven32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven32(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F32[1], value); } -void EmitFPRoundEven64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven64(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F64[1], value); } -void EmitFPFloor16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor16(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F16[1], value); } -void EmitFPFloor32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor32(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F32[1], value); } -void EmitFPFloor64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor64(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F64[1], value); } -void EmitFPCeil16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil16(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F16[1], value); } -void EmitFPCeil32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil32(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F32[1], value); } -void EmitFPCeil64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil64(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F64[1], value); } -void EmitFPTrunc16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc16(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F16[1], value); } -void EmitFPTrunc32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc32(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F32[1], value); } -void EmitFPTrunc64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc64(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F64[1], value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index a1d16b81e..22117a4ee 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -113,20 +113,4 @@ Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitLogicalOr(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalAnd(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalXor(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalNot(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index ff2f4fb74..c5a07252f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitConvertS16F16(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS16F32(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS16F64(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS32F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS32F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64U32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32U64(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 77d698ffd..808c1b401 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -94,8 +94,7 @@ void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset) { +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } @@ -129,8 +128,8 @@ void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, Id value) { +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } @@ -140,8 +139,19 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitWriteStorage64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + // TODO: Support reinterpreting bindings, guaranteed to be aligned + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id low_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)}; + const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)}; + ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } void EmitWriteStorage128(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 16b4ae888..51c2f15cf 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f42489d41..559ab9cca 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -547,11 +547,11 @@ F32 IREmitter::FPSqrt(const F32& value) { F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPSaturate16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPSaturate32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPSaturate64, value); default: ThrowInvalidType(value.Type()); @@ -560,11 +560,11 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPRoundEven16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPRoundEven32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRoundEven64, value); default: ThrowInvalidType(value.Type()); @@ -573,11 +573,11 @@ F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPFloor16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPFloor32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPFloor64, value); default: ThrowInvalidType(value.Type()); @@ -586,11 +586,11 @@ F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPCeil16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPCeil32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPCeil64, value); default: ThrowInvalidType(value.Type()); @@ -599,11 +599,11 @@ F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPTrunc16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPTrunc32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPTrunc64, value); default: ThrowInvalidType(value.Type()); @@ -729,33 +729,33 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS64F64, value); default: ThrowInvalidType(value.Type()); @@ -769,33 +769,33 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU64F64, value); default: ThrowInvalidType(value.Type()); @@ -829,10 +829,10 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { case 64: switch (value.Type()) { case Type::U32: + return Inst(Opcode::ConvertU64U32, value); + case Type::U64: // Nothing to do return value; - case Type::U64: - return Inst(Opcode::ConvertU64U32, value); default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ee76db9ad..d6a9be87d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -216,6 +216,10 @@ void Inst::ReplaceUsesWith(Value replacement) { } } +void Inst::ReplaceOpcode(IR::Opcode opcode) { + op = opcode; +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 5b244fa0b..321393dd7 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -86,6 +86,8 @@ public: void ReplaceUsesWith(Value replacement); + void ReplaceOpcode(IR::Opcode opcode); + template requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ede5e20c2..50da77535 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -119,8 +119,10 @@ OPCODE(PackUint2x32, U64, U32x OPCODE(UnpackUint2x32, U32x2, U64, ) OPCODE(PackFloat2x16, U32, F16x2, ) OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackDouble2x32, U64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, U64, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit OPCODE(GetZeroFromOp, U1, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 0ce99ef2a..8c301c3a1 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -35,4 +35,4 @@ std::string DumpProgram(const Program& program) { return ret; } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8c44ebb29..16cdc12e2 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool dest_format; BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; - BitField<39, 1, Rounding> rounding; + BitField<39, 2, Rounding> rounding; BitField<49, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; @@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) { } } +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; @@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { const size_t bitsize{BitSize(f2i.dest_format)}; const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; - v.X(f2i.dest_reg, result); + if (bitsize == 64) { + const IR::Value vector{v.ir.UnpackUint2x32(result)}; + v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); + v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + } else { + v.X(f2i.dest_reg, result); + } if (f2i.cc != 0) { - v.SetZFlag(v.ir.GetZeroFromOp(result)); - if (is_signed) { - v.SetSFlag(v.ir.GetSignFromOp(result)); - } else { - v.ResetSFlag(); - } - v.ResetCFlag(); - - // TODO: Investigate if out of bound conversions sets the overflow flag - v.ResetOFlag(); + throw NotImplementedException("F2I CC"); } } } // Anonymous namespace @@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) { f2i.base.src_format.Value()); } }()}; - TranslateF2I(*this, insn, op_a); } -void TranslatorVisitor::F2I_cbuf(u64) { - throw NotImplementedException("{}", Opcode::F2I_cbuf); +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetCbufF(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); } void TranslatorVisitor::F2I_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8bd468244..27aba2cf8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { class TranslatorVisitor { public: - explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} Environment& env; IR::IREmitter ir; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f2326dea1..f7f102f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF16x3: + case IR::Opcode::CompositeConstructF16x4: + case IR::Opcode::CompositeExtractF16x2: + case IR::Opcode::CompositeExtractF16x3: + case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::PackFloat2x16: + case IR::Opcode::UnpackFloat2x16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS32F16: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU32F16: + case IR::Opcode::ConvertU64F16: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: - info.uses_fp16; + info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: case IR::Opcode::FPAdd64: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9eb61b54c..4d4e88259 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) { bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { /* * We are looking for this pattern: - * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) - * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) - * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) - * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) - * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) - * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %rhs_mul = IMul32 %rhs_bfe, %factor_b + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %rhs_mul = IMul32 %lhs_bfe, %factor_b + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 + * %result = IAdd32 %lhs_shl, %rhs_mul * * And replacing it with * %result = IMul32 %factor_a, %factor_b diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..c7032f168 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs16: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd16: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil16: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor16: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma16: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul16: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg16: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven16: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate16: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPTrunc16: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF16x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF16x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF16x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF16x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF16x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF16x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::ConvertS16F16: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F16: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F16: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F16: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F16: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F16: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::PackFloat2x16: + return IR::Opcode::PackHalf2x16; + case IR::Opcode::UnpackFloat2x16: + return IR::Opcode::UnpackHalf2x16; + default: + return op; + } +} +} // Anonymous namespace + +void LowerFp16ToFp32(IR::Program& program) { + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 89e5811d3..38106308c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); +void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 050a37f18..abd44e323 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -67,8 +67,8 @@ int main() { ObjectPool inst_pool; ObjectPool block_pool; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - // FileEnvironment env{"D:\\Shaders\\shader.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + FileEnvironment env{"D:\\Shaders\\shader.bin"}; block_pool.ReleaseContents(); inst_pool.ReleaseContents(); flow_block_pool.ReleaseContents(); @@ -76,5 +76,9 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - void(Backend::SPIRV::EmitSPIRV(env, program)); + const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; + std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); + std::fclose(file); + std::system("spirv-dis D:\\shader.spv"); } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index f78813b5f..c10751b9d 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template - requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v[[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward(args)...); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..a444d55d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + /* + FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); + */ } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4bf3e4819..c2a41a360 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -31,8 +31,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); @@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { // TODO: Load from cache } const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-dis D:\\shader.spv"); + shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b757454c4..1b662f9f3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,8 +36,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..85f903125 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = false, .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, + .shaderFloat64 = true, + .shaderInt64 = true, + .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + // is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- src/shader_recompiler/CMakeLists.txt | 5 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 63 +++++++++++++++++-- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_floating_point.cpp | 6 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 32 +++++----- src/shader_recompiler/frontend/ir/ir_emitter.h | 8 +-- src/shader_recompiler/frontend/ir/modifiers.h | 23 ++++--- .../impl/floating_point_conversion_integer.cpp | 19 ++++-- .../ir_opt/collect_shader_info_pass.cpp | 71 ++++++++++++++++++++-- .../global_memory_to_storage_buffer_pass.cpp | 1 - src/shader_recompiler/main.cpp | 13 +++- src/shader_recompiler/profile.h | 9 ++- src/shader_recompiler/recompiler.cpp | 5 +- src/shader_recompiler/recompiler.h | 4 +- src/shader_recompiler/shader_info.h | 7 ++- .../renderer_vulkan/vk_compute_pipeline.cpp | 7 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 ++++- src/video_core/vulkan_common/vulkan_device.cpp | 26 +++++--- src/video_core/vulkan_common/vulkan_device.h | 33 +++++----- src/video_core/vulkan_common/vulkan_wrapper.cpp | 2 - 20 files changed, 260 insertions(+), 93 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6047f3ebe..fbd4ec6dc 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h + frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC shader_info.h ) -target_include_directories(shader_recompiler PRIVATE sirit) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) -target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) +target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) add_executable(shader_util main.cpp) target_link_libraries(shader_util PRIVATE shader_recompiler) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4ce07c281..2519e446a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,8 +14,6 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { template @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { throw NotImplementedException("Phi node type {}", type); } } + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + if (!profile.support_float_controls) { + return; + } + const Info& info{program.info}; + if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && + !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { + return; + } + ctx.AddExtension("SPV_KHR_float_controls"); + + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + } + } +} } // Anonymous namespace -std::vector EmitSPIRV(Environment& env, IR::Program& program) { +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -131,10 +181,11 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.OpFunctionEnd(); } boost::container::small_vector interfaces; - if (program.info.uses_workgroup_id) { + const Info& info{program.info}; + if (info.uses_workgroup_id) { interfaces.push_back(ctx.workgroup_id); } - if (program.info.uses_local_invocation_id) { + if (info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } const std::span interfaces_span(interfaces.data(), interfaces.size()); @@ -144,6 +195,8 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); + SetupDenormControl(profile, program, ctx, func); + return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 2b59c0b72..de624a151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -11,10 +11,12 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, + IR::Program& program); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9ef180531..c9687de37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::NoContraction); } switch (flags.rounding) { + case IR::FpRounding::DontCare: + break; case IR::FpRounding::RN: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); break; case IR::FpRounding::RM: ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); break; } - if (flags.fmz_mode != IR::FmzMode::FTZ) { - throw NotImplementedException("Denorm management not implemented"); - } return op; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } -F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -129,10 +129,10 @@ public: [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,25 +4,30 @@ #pragma once +#include "common/common_types.h" + namespace Shader::IR { enum class FmzMode : u8 { - None, // Denorms are not flushed, NAN is propagated (nouveau) - FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) - FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) }; enum class FpRounding : u8 { - RN, // Round to nearest even, - RM, // Round towards negative infinity - RP, // Round towards positive infinity - RZ, // Round towards zero + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero }; struct FpControl { bool no_contraction{false}; - FpRounding rounding{FpRounding::RN}; - FmzMode fmz_mode{FmzMode::FTZ}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; }; static_assert(sizeof(FpControl) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,23 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { namespace { -void AddConstantBufferDescriptor(Info& info, u32 index) { - auto& descriptor{info.constant_buffers.at(index)}; - if (descriptor) { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { return; } - descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + info.constant_buffer_mask |= 1U << index; + info.constant_buffer_descriptors.push_back({ .index{index}, .count{1}, }); } -void Visit(Info& info, IR::Inst& inst) { +void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { break; case IR::Opcode::GetCbuf: if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32()); + AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { break; } } + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); +} } // Anonymous namespace void CollectShaderInfoPass(IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_offset{storage_buffer.offset}, .count{1}, }); - info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); ++storage_index; } for (const StorageInst& storage_inst : to_replace) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index abd44e323..72565f477 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -60,6 +60,17 @@ void RunDatabase() { fmt::print(stdout, "{} ms", duration_cast(t - t0).count() / double(N)); } +static constexpr Profile PROFILE{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = true, + .support_separate_rounding_mode = true, + .support_fp16_denorm_preserve = true, + .support_fp32_denorm_preserve = true, + .support_fp16_denorm_flush = true, + .support_fp32_denorm_flush = true, +}; + int main() { // RunDatabase(); @@ -76,7 +87,7 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + const std::vector spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); std::fclose(file); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c96d783b7..9881bebab 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -7,7 +7,14 @@ namespace Shader { struct Profile { - bool unified_descriptor_binding; + bool unified_descriptor_binding{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b25081e39..527e19c27 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -14,14 +14,15 @@ namespace Shader { -std::pair> RecompileSPIRV(Environment& env, u32 start_address) { +std::pair> RecompileSPIRV(const Profile& profile, Environment& env, + u32 start_address) { ObjectPool flow_block_pool; ObjectPool inst_pool; ObjectPool block_pool; Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 4cb973878..2529463ae 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -9,10 +9,12 @@ #include "common/common_types.h" #include "shader_recompiler/environment.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" namespace Shader { -[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); +[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, + Environment& env, u32 start_address); } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f49a79368..8766bf13e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -31,14 +31,15 @@ struct Info { bool uses_local_invocation_id{}; bool uses_fp16{}; bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; u32 constant_buffer_mask{}; - std::array constant_buffers{}; boost::container::static_vector constant_buffer_descriptors; - - std::array storage_buffers{}; boost::container::static_vector storage_buffers_descriptors; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -684,7 +684,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -128,6 +128,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -223,11 +228,6 @@ public: return reported_extensions; } - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - u64 GetDeviceLocalMemory() const { return device_access_memory; } @@ -245,6 +245,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,14 +270,15 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. @@ -301,9 +305,6 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: -- cgit v1.2.3 From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: shader: Rename, implement FADD.SAT and P2R (imm) --- src/shader_recompiler/CMakeLists.txt | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 40 ++++++------ .../backend/spirv/emit_spirv_floating_point.cpp | 58 +++++++---------- .../backend/spirv/emit_spirv_integer.cpp | 75 +++++++++++++++------- .../backend/spirv/emit_spirv_select.cpp | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 ++--- src/shader_recompiler/frontend/ir/pred.h | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../maxwell/translate/impl/floating_point_add.cpp | 20 +++--- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +- .../translate/impl/floating_point_multiply.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 17 ++++- .../frontend/maxwell/translate/impl/impl.h | 7 +- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../translate/impl/move_predicate_to_register.cpp | 66 +++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +- 18 files changed, 213 insertions(+), 127 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fbd4ec6dc..802527255 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -74,9 +74,10 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp - frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/move_predicate_to_register.cpp frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp + frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index de624a151..922e294a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -110,7 +110,7 @@ void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); void EmitSelect8(EmitContext& ctx); void EmitSelect16(EmitContext& ctx); -void EmitSelect32(EmitContext& ctx); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitSelect64(EmitContext& ctx); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); @@ -130,9 +130,9 @@ void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx); -void EmitFPAbs32(EmitContext& ctx); -void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); @@ -146,9 +146,9 @@ void EmitFPMin64(EmitContext& ctx); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -void EmitFPNeg16(EmitContext& ctx); -void EmitFPNeg32(EmitContext& ctx); -void EmitFPNeg64(EmitContext& ctx); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); void EmitFPRecip32(EmitContext& ctx); void EmitFPRecip64(EmitContext& ctx); void EmitFPRecipSqrt32(EmitContext& ctx); @@ -161,9 +161,9 @@ void EmitFPExp2NotReduced(EmitContext& ctx); void EmitFPCos(EmitContext& ctx); void EmitFPCosNotReduced(EmitContext& ctx); void EmitFPLog2(EmitContext& ctx); -void EmitFPSaturate16(EmitContext& ctx); -void EmitFPSaturate32(EmitContext& ctx); -void EmitFPSaturate64(EmitContext& ctx); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); Id EmitFPRoundEven16(EmitContext& ctx, Id value); Id EmitFPRoundEven32(EmitContext& ctx, Id value); Id EmitFPRoundEven64(EmitContext& ctx, Id value); @@ -186,21 +186,21 @@ void EmitIAbs32(EmitContext& ctx); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); void EmitShiftRightLogical32(EmitContext& ctx); void EmitShiftRightArithmetic32(EmitContext& ctx); -void EmitBitwiseAnd32(EmitContext& ctx); -void EmitBitwiseOr32(EmitContext& ctx); -void EmitBitwiseXor32(EmitContext& ctx); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); void EmitBitFieldInsert(EmitContext& ctx); void EmitBitFieldSExtract(EmitContext& ctx); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitULessThan(EmitContext& ctx); -void EmitIEqual(EmitContext& ctx); -void EmitSLessThanEqual(EmitContext& ctx); -void EmitULessThanEqual(EmitContext& ctx); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitUGreaterThan(EmitContext& ctx); -void EmitINotEqual(EmitContext& ctx); -void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); void EmitLogicalOr(EmitContext& ctx); void EmitLogicalAnd(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9687de37..47f87054b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -12,37 +12,21 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { if (flags.no_contraction) { ctx.Decorate(op, spv::Decoration::NoContraction); } - switch (flags.rounding) { - case IR::FpRounding::DontCare: - break; - case IR::FpRounding::RN: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); - break; - case IR::FpRounding::RM: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); - break; - case IR::FpRounding::RP: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); - break; - case IR::FpRounding::RZ: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); - break; - } return op; } } // Anonymous namespace -void EmitFPAbs16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs16(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F16[1], value); } -void EmitFPAbs32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs32(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F32[1], value); } -void EmitFPAbs64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs64(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F64[1], value); } Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { @@ -97,16 +81,16 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitFPNeg16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg16(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F16[1], value); } -void EmitFPNeg32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg32(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F32[1], value); } -void EmitFPNeg64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg64(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F64[1], value); } void EmitFPRecip32(EmitContext&) { @@ -157,16 +141,22 @@ void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitFPSaturate16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate16(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; + const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); } -void EmitFPSaturate32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate32(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; + const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); } -void EmitFPSaturate64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate64(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; + const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; + return ctx.OpFClamp(ctx.F64[1], value, zero, one); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 22117a4ee..4c0b5990d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -7,10 +7,39 @@ namespace Shader::Backend::SPIRV { Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - if (inst->HasAssociatedPseudoOperation()) { - throw NotImplementedException("Pseudo-operations on IAdd32"); + Id result{}; + if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; + const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; + result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + + const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; + carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); + carry->Invalidate(); + } else { + result = ctx.OpIAdd(ctx.U32[1], a, b); } - return ctx.OpIAdd(ctx.U32[1], a, b); + if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); + } + if (IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}) { + sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); + sign->Invalidate(); + } + if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)}; + + const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; + const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; + const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; + overflow->SetDefinition(carry_flag); + overflow->Invalidate(); + } + return result; } void EmitIAdd64(EmitContext&) { @@ -49,16 +78,16 @@ void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitBitwiseAnd32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseAnd(ctx.U32[1], a, b); } -void EmitBitwiseOr32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseOr(ctx.U32[1], a, b); } -void EmitBitwiseXor32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseXor(ctx.U32[1], a, b); } void EmitBitFieldInsert(EmitContext&) { @@ -77,36 +106,36 @@ Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitULessThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1, lhs, rhs); } -void EmitIEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1, lhs, rhs); } -void EmitSLessThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); } -void EmitULessThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); } Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitUGreaterThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); } -void EmitINotEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpINotEqual(ctx.U1, lhs, rhs); } -void EmitSGreaterThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); } Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 8d5062724..eb1926a4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -14,8 +14,8 @@ void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSelect32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); } void EmitSelect64(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8f120a2f6..34c2f67fb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -468,11 +468,11 @@ F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPAbs16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPAbs32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPAbs64, value); default: ThrowInvalidType(value.Type()); @@ -481,11 +481,11 @@ F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPNeg16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPNeg32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPNeg64, value); default: ThrowInvalidType(value.Type()); @@ -495,10 +495,10 @@ F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { F16F32F64 result{value}; if (abs) { - result = FPAbs(value); + result = FPAbs(result); } if (neg) { - result = FPNeg(value); + result = FPNeg(result); } return result; } diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index c6f2f82bf..4e7f32423 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -19,8 +19,8 @@ enum class Pred : u64 { PT, }; -constexpr size_t NUM_USER_PREDS = 6; -constexpr size_t NUM_PREDS = 7; +constexpr size_t NUM_USER_PREDS = 7; +constexpr size_t NUM_PREDS = 8; [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { return static_cast(pred); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 16cdc12e2..ed5dbf41f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool src_a; } const fadd{insn}; - if (sat) { - throw NotImplementedException("FADD SAT"); - } if (cc) { throw NotImplementedException("FADD CC"); } @@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); } void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20F(insn)); + FADD(*this, insn, GetRegFloat20(insn)); } -void TranslatorVisitor::FADD_cbuf(u64) { - throw NotImplementedException("FADD (cbuf)"); +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FADD_imm(u64) { - throw NotImplementedException("FADD (imm)"); +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); } void TranslatorVisitor::FADD32I(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 4d82a0009..81175627f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) { case SrcFormat::F16: return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; case SrcFormat::F32: - return GetCbufF(insn); + return GetFloatCbuf(insn); case SrcFormat::F64: { return UnpackCbuf(*this, insn); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 1464f2807..758700d3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); + FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 1b1d38be7..5c38d3fc1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20F(insn)); + return FMUL(*this, insn, GetRegFloat20(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 079e3497f..be17bb0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { return ir.BitCast(GetCbuf(insn)); } @@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { return ir.Imm32(value); } +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; + const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 27aba2cf8..4d4cf2ebf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,13 +304,14 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetReg20F(u64 insn); - [[nodiscard]] IR::F32 GetReg39F(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::F32 GetCbufF(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 623e78ff8..1493e1815 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) { IADD(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::IADD_imm(u64) { - throw NotImplementedException("IADD (imm)"); +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::IADD32I(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6b2a1356b..628cf1c14 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) { ThrowNotImplemented(Opcode::OUT_imm); } -void TranslatorVisitor::P2R_reg(u64) { - ThrowNotImplemented(Opcode::P2R_reg); -} - -void TranslatorVisitor::P2R_cbuf(u64) { - ThrowNotImplemented(Opcode::P2R_cbuf); -} - -void TranslatorVisitor::P2R_imm(u64) { - ThrowNotImplemented(Opcode::P2R_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 49ff911d6..b25af6cd3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; - + /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); - + */ shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, -- cgit v1.2.3 From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: spirv: Fixes and Intel specific workarounds --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 ++- src/shader_recompiler/backend/spirv/emit_context.h | 5 ++++- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 +++--- src/shader_recompiler/backend/spirv/emit_spirv.h | 8 ++++---- .../backend/spirv/emit_spirv_floating_point.cpp | 13 ++++++++++--- .../backend/spirv/emit_spirv_logical.cpp | 16 ++++++++-------- .../frontend/ir/structured_control_flow.cpp | 3 --- src/shader_recompiler/frontend/maxwell/program.cpp | 3 --- .../frontend/maxwell/translate/impl/impl.cpp | 15 +++++++++------ src/shader_recompiler/profile.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 11 files changed, 44 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea1c8a3be..d2dbd56d4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -25,7 +25,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { +EmitContext::EmitContext(const Profile& profile_, IR::Program& program) + : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8de203da2..d20cf387e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { @@ -30,11 +31,13 @@ private: class EmitContext final : public Sirit::Module { public: - explicit EmitContext(IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); + const Profile& profile; + Id void_id{}; Id U1{}; Id U16{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2519e446a..f3aca90d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -150,11 +150,11 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); } else { // Same as fp32, no need to warn as most drivers will flush by default } - } else if (info.uses_fp32_denorms_preserve) { + } else if (info.uses_fp16_denorms_preserve) { if (profile.support_fp16_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); @@ -166,7 +166,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{program}; + EmitContext ctx{profile, program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) Id func{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 922e294a7..cec80c13e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -202,10 +202,10 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -void EmitLogicalOr(EmitContext& ctx); -void EmitLogicalAnd(EmitContext& ctx); -void EmitLogicalXor(EmitContext& ctx); -void EmitLogicalNot(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); Id EmitConvertS16F16(EmitContext& ctx, Id value); Id EmitConvertS16F32(EmitContext& ctx, Id value); Id EmitConvertS16F64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 47f87054b..5d0b74f9b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -15,6 +15,13 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { return op; } +Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) { + if (ctx.profile.has_broken_spirv_clamp) { + return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); + } else { + return ctx.OpFClamp(type, value, zero, one); + } +} } // Anonymous namespace Id EmitFPAbs16(EmitContext& ctx, Id value) { @@ -144,19 +151,19 @@ void EmitFPLog2(EmitContext&) { Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; - return ctx.OpFClamp(ctx.F32[1], value, zero, one); + return Saturate(ctx, ctx.F16[1], value, zero, one); } Id EmitFPSaturate32(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; - return ctx.OpFClamp(ctx.F32[1], value, zero, one); + return Saturate(ctx, ctx.F32[1], value, zero, one); } Id EmitFPSaturate64(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; - return ctx.OpFClamp(ctx.F64[1], value, zero, one); + return Saturate(ctx, ctx.F64[1], value, zero, one); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index c5a07252f..bb434def2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,20 +6,20 @@ namespace Shader::Backend::SPIRV { -void EmitLogicalOr(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalOr(ctx.U1, a, b); } -void EmitLogicalAnd(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalAnd(ctx.U1, a, b); } -void EmitLogicalXor(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalNotEqual(ctx.U1, a, b); } -void EmitLogicalNot(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalNot(EmitContext& ctx, Id value) { + return ctx.OpLogicalNot(ctx.U1, value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index d145095d1..032ac8fda 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -272,11 +272,9 @@ public: explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; - fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); } - fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); } Statement& RootStatement() noexcept { @@ -548,7 +546,6 @@ private: size_t Offset(ConstNode stmt) const { size_t offset{0}; if (!SearchNode(root_stmt.children, stmt, offset)) { - fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); throw LogicError("Node not found in tree"); } return offset; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ed5dbf41f..dbfc04f75 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,7 +56,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const s32 positive_value{static_cast(imm.value)}; - const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + if (imm.is_negative != 0) { + const s64 raw{static_cast(imm.value)}; + return ir.Imm32(static_cast(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast(imm.value)); + } } IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { @@ -94,9 +97,9 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; - const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 value{static_cast(imm.value) << 12}; + return ir.Imm32(Common::BitCast(value | sign_bit)); } IR::U32 TranslatorVisitor::GetImm32(u64 insn) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9881bebab..917fc1251 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,9 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; + + // FClamp is broken and OpFMax + OpFMin should be used instead + bool has_broken_spirv_clamp{}; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b25af6cd3..2497c2385 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -189,6 +189,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; /* -- cgit v1.2.3 From ab463712474de5f99eec137a9c6233e55fe184f0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Mar 2021 18:31:53 -0300 Subject: shader: Initial support for textures and TEX --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_context.cpp | 69 ++- src/shader_recompiler/backend/spirv/emit_context.h | 7 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 12 + src/shader_recompiler/backend/spirv/emit_spirv.h | 32 +- .../backend/spirv/emit_spirv_convert.cpp | 48 ++ .../backend/spirv/emit_spirv_image.cpp | 146 ++++++ .../backend/spirv/emit_spirv_memory.cpp | 18 +- src/shader_recompiler/environment.h | 2 + src/shader_recompiler/file_environment.cpp | 4 + src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 133 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 21 +- .../frontend/ir/microinstruction.cpp | 73 ++- .../frontend/ir/microinstruction.h | 22 +- src/shader_recompiler/frontend/ir/modifiers.h | 10 + src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 569 +++++++++++---------- src/shader_recompiler/frontend/ir/reg.h | 11 + src/shader_recompiler/frontend/ir/value.h | 1 + src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_sample.cpp | 232 +++++++++ .../ir_opt/collect_shader_info_pass.cpp | 19 + .../global_memory_to_storage_buffer_pass.cpp | 15 +- src/shader_recompiler/ir_opt/passes.h | 2 + src/shader_recompiler/ir_opt/texture_pass.cpp | 199 +++++++ src/shader_recompiler/shader_info.h | 52 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 101 ++++ .../renderer_vulkan/vk_compute_pipeline.h | 4 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +- 33 files changed, 1489 insertions(+), 342 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_image.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp create mode 100644 src/shader_recompiler/ir_opt/texture_pass.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index cc38b28ed..fa268d38f 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_control_flow.cpp backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp + backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp @@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/texture_sample.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp @@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC ir_opt/lower_fp16_to_fp32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp + ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h profile.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d2dbd56d4..21900d387 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -12,6 +12,43 @@ #include "shader_recompiler/backend/spirv/emit_context.h" namespace Shader::Backend::SPIRV { +namespace { +Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + const Id type{ctx.F32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); + case TextureType::ColorCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); + case TextureType::ColorArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + case TextureType::Shadow1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); + case TextureType::ShadowArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); + case TextureType::Shadow2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); + case TextureType::ShadowArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); + case TextureType::Shadow3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); + case TextureType::ShadowCube: + return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); + case TextureType::ShadowArrayCube: + return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} +} // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { defs[0] = sirit_ctx.Name(base_type, name); @@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program) u32 binding{}; DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); + DefineTextures(program.info, binding); DefineLabels(program); } @@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) { return value.Inst()->Definition(); } switch (value.Type()) { + case IR::Type::Void: + // Void instructions are used for optional arguments (e.g. texture offsets) + // They are not meant to be used in the SPIR-V module + return Id{}; case IR::Type::U1: return value.U1() ? true_value : false_value; case IR::Type::U32: @@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); u32 index{}; - for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); u32 index{}; - for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineTextures(const Info& info, u32& binding) { + textures.reserve(info.texture_descriptors.size()); + for (const TextureDescriptor& desc : info.texture_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of textures"); + } + const Id type{TypeSampledImage(ImageType(*this, desc))}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + for (u32 index = 0; index < desc.count; ++index) { + // TODO: Pass count info + textures.push_back(TextureDefinition{ + .id{id}, + .type{type}, + }); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (const IR::Function& function : program.functions) { for (IR::Block* const block : function.blocks) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d20cf387e..8b3109eb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -29,6 +29,11 @@ private: std::array defs{}; }; +struct TextureDefinition { + Id id; + Id type; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program); @@ -56,6 +61,7 @@ public: std::array cbufs{}; std::array ssbos{}; + std::vector textures; Id workgroup_id{}; Id local_invocation_id{}; @@ -66,6 +72,7 @@ private: void DefineSpecialVariables(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 8097fe82d..a94e9cb2d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -221,6 +221,14 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program workgroup_size[2]); SetupDenormControl(profile, program, ctx, func); + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); return ctx.Assemble(); } @@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } +void EmitGetSparseFromOp(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 92387ca28..69698c478 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -void EmitWriteStorage128(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); @@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); Id EmitFPAbs16(EmitContext& ctx, Id value); Id EmitFPAbs32(EmitContext& ctx, Id value); Id EmitFPAbs64(EmitContext& ctx, Id value); @@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, Id offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, Id offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, Id offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, Id offset); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index edcc2a1cc..2aff673aa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { return ctx.OpFConvert(ctx.F64[1], value); } +Id EmitConvertF16S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF32S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], value); +} + +Id EmitConvertF32U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF32U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], value); +} + +Id EmitConvertF64S32(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64S64(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], value); +} + +Id EmitConvertF64U32(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + +Id EmitConvertF64U64(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..5f4783c95 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -0,0 +1,146 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +class ImageOperands { +public: + explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, + Id lod, Id offset) { + if (has_bias) { + const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Bias, bias); + } + if (has_lod) { + const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; + Add(spv::ImageOperandsMask::Lod, lod_value); + } + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + + void Add(spv::ImageOperandsMask new_mask, Id value) { + mask = static_cast(static_cast(mask) | + static_cast(new_mask)); + operands.push_back(value); + } + + std::span Span() const noexcept { + return std::span{operands.data(), operands.size()}; + } + + spv::ImageOperandsMask Mask() const noexcept { + return mask; + } + +private: + boost::container::static_vector operands; + spv::ImageOperandsMask mask{}; +}; + +Id Texture(EmitContext& ctx, const IR::Value& index) { + if (index.IsImmediate()) { + const TextureDefinition def{ctx.textures.at(index.U32())}; + return ctx.OpLoad(def.type, def.id); + } + throw NotImplementedException("Indirect texture sample"); +} + +template +Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, + Id result_type, Args&&... args) { + IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (!sparse) { + return (ctx.*non_sparse_ptr)(result_type, std::forward(args)...); + } + const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; + const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward(args)...)}; + const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; + sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); + sparse->Invalidate(); + return ctx.OpCompositeExtract(result_type, sample, 1U); +} +} // Anonymous namespace + +Id EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), + coords, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, + offset); + return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, + &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, Id offset) { + const auto info{inst->Flags()}; + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 808c1b401..7d3efc741 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } -void EmitWriteStorage128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + // TODO: Support reinterpreting bindings, guaranteed to be aligned + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + for (u32 element = 0; element < 4; ++element) { + Id index = base_index; + if (element > 0) { + index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); + } + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); + } } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0ba681fb9..0fcb68050 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -12,6 +12,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 5127523f9..21700c72b 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { return data[offset / 8]; } +u32 FileEnvironment::TextureBoundBuffer() { + throw NotImplementedException("Texture bound buffer serialization"); +} + std::array FileEnvironment::WorkgroupSize() { return {1, 1, 1}; } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index b8c4bbadd..62302bc8e 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -3,7 +3,7 @@ #include #include "common/common_types.h" -#include "environment.h" +#include "shader_recompiler/environment.h" namespace Shader { @@ -14,6 +14,8 @@ public: u64 ReadInstruction(u32 offset) override; + u32 TextureBoundBuffer() override; + std::array WorkgroupSize() override; private: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f38b46bac..ae3354c66 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -7,11 +7,24 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { - -[[noreturn]] static void ThrowInvalidType(Type type) { +namespace { +[[noreturn]] void ThrowInvalidType(Type type) { throw InvalidArgument("Invalid type {}", type); } +Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { + if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { + return ir.CompositeConstruct(bias_lod, lod_clamp); + } else if (!bias_lod.IsEmpty()) { + return bias_lod; + } else if (!lod_clamp.IsEmpty()) { + return lod_clamp; + } else { + return Value{}; + } +} +} // Anonymous namespace + U1 IREmitter::Imm1(bool value) const { return U1{Value{value}}; } @@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { return Inst(Opcode::GetOverflowFromOp, op); } +U1 IREmitter::GetSparseFromOp(const Value& op) { + return Inst(Opcode::GetSparseFromOp, op); +} + F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { if (a.Type() != a.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); @@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } +F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16S32, value); + case Type::U64: + return Inst(Opcode::ConvertF16S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32S32, value); + case Type::U64: + return Inst(Opcode::ConvertF32S64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64S32, value); + case Type::U64: + return Inst(Opcode::ConvertF64S64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { + switch (bitsize) { + case 16: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF16U32, value); + case Type::U64: + return Inst(Opcode::ConvertF16U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 32: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF32U32, value); + case Type::U64: + return Inst(Opcode::ConvertF32U64, value); + default: + ThrowInvalidType(value.Type()); + } + case 64: + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::ConvertF64U32, value); + case Type::U64: + return Inst(Opcode::ConvertF64U64, value); + default: + ThrowInvalidType(value.Type()); + } + default: + throw InvalidArgument("Invalid destination bitsize {}", bitsize); + } +} + +F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { + if (is_signed) { + return ConvertSToF(bitsize, value); + } else { + return ConvertUToF(bitsize, value); + } +} + U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { switch (result_bitsize) { case 32: @@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod + : Opcode::BindlessImageSampleImplicitLod}; + return Inst(op, Flags{info}, handle, coords, bias_lc, offset); +} + +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod + : Opcode::BindlessImageSampleExplicitLod}; + return Inst(op, Flags{info}, handle, coords, lod_lc, offset); +} + +F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info) { + const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod + : Opcode::BindlessImageSampleDrefImplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, bias_lc, offset); +} + +F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, + const F32& lod, const Value& offset, const F32& lod_clamp, + TextureInstInfo info) { + const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)}; + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod + : Opcode::BindlessImageSampleDrefExplicitLod}; + return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 6e29bf0e2..cb2a7710a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -91,6 +91,7 @@ public: [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op); + [[nodiscard]] U1 GetSparseFromOp(const Value& op); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); @@ -159,7 +160,7 @@ public: [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, const U32& count); [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, - bool is_signed); + bool is_signed = false); [[nodiscard]] U32 BitReverse(const U32& value); [[nodiscard]] U32 BitCount(const U32& value); [[nodiscard]] U32 BitwiseNot(const U32& value); @@ -186,10 +187,28 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, + const F32& bias, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, + const F32& lod, const Value& offset, + const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index d6a9be87d..88e186f21 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -10,26 +10,27 @@ #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { - -static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { +namespace { +void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { if (inst && inst->Opcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } -static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { +void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { if (dest_inst) { throw LogicError("Only one of each type of pseudo-op allowed"); } dest_inst = pseudo_inst; } -static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { +void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { if (inst->Opcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } +} // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { if (op == Opcode::Phi) { @@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept { case Opcode::GetSignFromOp: case Opcode::GetCarryFromOp: case Opcode::GetOverflowFromOp: + case Opcode::GetSparseFromOp: return true; default: return false; @@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const { [](const IR::Value& value) { return value.IsImmediate(); }); } -bool Inst::HasAssociatedPseudoOperation() const noexcept { - return zero_inst || sign_inst || carry_inst || overflow_inst; -} - Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { - // This is faster than doing a search through the block. + if (!associated_insts) { + return nullptr; + } switch (opcode) { case Opcode::GetZeroFromOp: - CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp); - return zero_inst; + CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); + return associated_insts->zero_inst; case Opcode::GetSignFromOp: - CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp); - return sign_inst; + CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); + return associated_insts->sign_inst; case Opcode::GetCarryFromOp: - CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp); - return carry_inst; + CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); + return associated_insts->carry_inst; case Opcode::GetOverflowFromOp: - CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp); - return overflow_inst; + CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); + return associated_insts->overflow_inst; + case Opcode::GetSparseFromOp: + CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); + return associated_insts->sparse_inst; default: throw InvalidArgument("{} is not a pseudo-instruction", opcode); } @@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - SetPseudoInstruction(inst->zero_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->zero_inst, this); break; case Opcode::GetSignFromOp: - SetPseudoInstruction(inst->sign_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sign_inst, this); break; case Opcode::GetCarryFromOp: - SetPseudoInstruction(inst->carry_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->carry_inst, this); break; case Opcode::GetOverflowFromOp: - SetPseudoInstruction(inst->overflow_inst, this); + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->overflow_inst, this); + break; + case Opcode::GetSparseFromOp: + AllocAssociatedInsts(assoc_inst); + SetPseudoInstruction(assoc_inst->sparse_inst, this); break; default: break; @@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) { Inst* const inst{value.Inst()}; --inst->use_count; + std::unique_ptr& assoc_inst{inst->associated_insts}; switch (op) { case Opcode::GetZeroFromOp: - RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); break; case Opcode::GetSignFromOp: - RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); break; case Opcode::GetCarryFromOp: - RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); break; case Opcode::GetOverflowFromOp: - RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp); + AllocAssociatedInsts(assoc_inst); + RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); break; default: break; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 321393dd7..d5336c438 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -22,7 +22,7 @@ namespace Shader::IR { class Block; -constexpr size_t MAX_ARG_COUNT = 4; +struct AssociatedInsts; class Inst : public boost::intrusive::list_base_hook<> { public: @@ -50,6 +50,11 @@ public: return op; } + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + /// Determines whether or not this instruction may have side effects. [[nodiscard]] bool MayHaveSideEffects() const noexcept; @@ -60,8 +65,6 @@ public: /// Determines if all arguments of this instruction are immediates. [[nodiscard]] bool AreAllArgsImmediates() const; - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept; /// Gets a pseudo-operation associated with this instruction [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); @@ -122,14 +125,21 @@ private: u32 definition{}; union { NonTriviallyDummy dummy{}; - std::array args; std::vector> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* sparse_inst; + Inst* zero_inst{}; }; - Inst* zero_inst{}; Inst* sign_inst{}; Inst* carry_inst{}; Inst* overflow_inst{}; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size"); } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 44652eae7..ad07700ae 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,7 +4,9 @@ #pragma once +#include "common/bit_field.h" #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { @@ -30,4 +32,12 @@ struct FpControl { }; static_assert(sizeof(FpControl) <= sizeof(u32)); +union TextureInstInfo { + u32 raw; + BitField<0, 8, TextureType> type; + BitField<8, 1, u32> has_bias; + BitField<16, 1, u32> has_lod_clamp; +}; +static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1f188411a..8492a13d5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -14,7 +14,7 @@ namespace { struct OpcodeMeta { std::string_view name; Type type; - std::array arg_types; + std::array arg_types; }; using enum Type; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index c4e72c84d..aa011fab1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -2,301 +2,330 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ... -OPCODE(Phi, Opaque, ) -OPCODE(Identity, Opaque, Opaque, ) -OPCODE(Void, Void, ) +// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... +OPCODE(Phi, Opaque, ) +OPCODE(Identity, Opaque, Opaque, ) +OPCODE(Void, Void, ) // Control flow -OPCODE(Branch, Void, Label, ) -OPCODE(BranchConditional, Void, U1, Label, Label, ) -OPCODE(LoopMerge, Void, Label, Label, ) -OPCODE(SelectionMerge, Void, Label, ) -OPCODE(Return, Void, ) +OPCODE(Branch, Void, Label, ) +OPCODE(BranchConditional, Void, U1, Label, Label, ) +OPCODE(LoopMerge, Void, Label, Label, ) +OPCODE(SelectionMerge, Void, Label, ) +OPCODE(Return, Void, ) // Context getters/setters -OPCODE(GetRegister, U32, Reg, ) -OPCODE(SetRegister, Void, Reg, U32, ) -OPCODE(GetPred, U1, Pred, ) -OPCODE(SetPred, Void, Pred, U1, ) -OPCODE(GetGotoVariable, U1, U32, ) -OPCODE(SetGotoVariable, Void, U32, U1, ) -OPCODE(GetCbuf, U32, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) -OPCODE(GetZFlag, U1, Void, ) -OPCODE(GetSFlag, U1, Void, ) -OPCODE(GetCFlag, U1, Void, ) -OPCODE(GetOFlag, U1, Void, ) -OPCODE(SetZFlag, Void, U1, ) -OPCODE(SetSFlag, Void, U1, ) -OPCODE(SetCFlag, Void, U1, ) -OPCODE(SetOFlag, Void, U1, ) -OPCODE(WorkgroupId, U32x3, ) -OPCODE(LocalInvocationId, U32x3, ) +OPCODE(GetRegister, U32, Reg, ) +OPCODE(SetRegister, Void, Reg, U32, ) +OPCODE(GetPred, U1, Pred, ) +OPCODE(SetPred, Void, Pred, U1, ) +OPCODE(GetGotoVariable, U1, U32, ) +OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetCbuf, U32, U32, U32, ) +OPCODE(GetAttribute, U32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, U32, ) +OPCODE(GetAttributeIndexed, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetZFlag, U1, Void, ) +OPCODE(GetSFlag, U1, Void, ) +OPCODE(GetCFlag, U1, Void, ) +OPCODE(GetOFlag, U1, Void, ) +OPCODE(SetZFlag, Void, U1, ) +OPCODE(SetSFlag, Void, U1, ) +OPCODE(SetCFlag, Void, U1, ) +OPCODE(SetOFlag, Void, U1, ) +OPCODE(WorkgroupId, U32x3, ) +OPCODE(LocalInvocationId, U32x3, ) // Undefined -OPCODE(UndefU1, U1, ) -OPCODE(UndefU8, U8, ) -OPCODE(UndefU16, U16, ) -OPCODE(UndefU32, U32, ) -OPCODE(UndefU64, U64, ) +OPCODE(UndefU1, U1, ) +OPCODE(UndefU8, U8, ) +OPCODE(UndefU16, U16, ) +OPCODE(UndefU32, U32, ) +OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, U64, ) +OPCODE(LoadGlobalS8, U32, U64, ) +OPCODE(LoadGlobalU16, U32, U64, ) +OPCODE(LoadGlobalS16, U32, U64, ) +OPCODE(LoadGlobal32, U32, U64, ) +OPCODE(LoadGlobal64, U32x2, U64, ) +OPCODE(LoadGlobal128, U32x4, U64, ) +OPCODE(WriteGlobalU8, Void, U64, U32, ) +OPCODE(WriteGlobalS8, Void, U64, U32, ) +OPCODE(WriteGlobalU16, Void, U64, U32, ) +OPCODE(WriteGlobalS16, Void, U64, U32, ) +OPCODE(WriteGlobal32, Void, U64, U32, ) +OPCODE(WriteGlobal64, Void, U64, U32x2, ) +OPCODE(WriteGlobal128, Void, U64, U32x4, ) // Storage buffer operations -OPCODE(LoadStorageU8, U32, U32, U32, ) -OPCODE(LoadStorageS8, U32, U32, U32, ) -OPCODE(LoadStorageU16, U32, U32, U32, ) -OPCODE(LoadStorageS16, U32, U32, U32, ) -OPCODE(LoadStorage32, U32, U32, U32, ) -OPCODE(LoadStorage64, U32x2, U32, U32, ) -OPCODE(LoadStorage128, U32x4, U32, U32, ) -OPCODE(WriteStorageU8, Void, U32, U32, U32, ) -OPCODE(WriteStorageS8, Void, U32, U32, U32, ) -OPCODE(WriteStorageU16, Void, U32, U32, U32, ) -OPCODE(WriteStorageS16, Void, U32, U32, U32, ) -OPCODE(WriteStorage32, Void, U32, U32, U32, ) -OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) -OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +OPCODE(LoadStorageU8, U32, U32, U32, ) +OPCODE(LoadStorageS8, U32, U32, U32, ) +OPCODE(LoadStorageU16, U32, U32, U32, ) +OPCODE(LoadStorageS16, U32, U32, U32, ) +OPCODE(LoadStorage32, U32, U32, U32, ) +OPCODE(LoadStorage64, U32x2, U32, U32, ) +OPCODE(LoadStorage128, U32x4, U32, U32, ) +OPCODE(WriteStorageU8, Void, U32, U32, U32, ) +OPCODE(WriteStorageS8, Void, U32, U32, U32, ) +OPCODE(WriteStorageU16, Void, U32, U32, U32, ) +OPCODE(WriteStorageS16, Void, U32, U32, U32, ) +OPCODE(WriteStorage32, Void, U32, U32, U32, ) +OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) +OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) // Vector utility -OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) -OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) -OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) -OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) -OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) -OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) -OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) -OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) -OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) -OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) -OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) -OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) -OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) -OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) -OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) -OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) -OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) -OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) -OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) -OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) -OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) -OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) -OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) -OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) -OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) -OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) -OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) -OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) -OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) -OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) -OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) -OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) -OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) -OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) -OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) -OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) +OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) +OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) +OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) +OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) +OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) +OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) +OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) +OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) +OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) +OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) +OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) +OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) +OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) +OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) +OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) +OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) +OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) +OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) +OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) +OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) +OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) +OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) +OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) +OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) +OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) +OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) +OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) +OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) +OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) +OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) // Select operations -OPCODE(SelectU1, U1, U1, U1, U1, ) -OPCODE(SelectU8, U8, U1, U8, U8, ) -OPCODE(SelectU16, U16, U1, U16, U16, ) -OPCODE(SelectU32, U32, U1, U32, U32, ) -OPCODE(SelectU64, U64, U1, U64, U64, ) -OPCODE(SelectF16, F16, U1, F16, F16, ) -OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectU1, U1, U1, U1, U1, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions -OPCODE(BitCastU16F16, U16, F16, ) -OPCODE(BitCastU32F32, U32, F32, ) -OPCODE(BitCastU64F64, U64, F64, ) -OPCODE(BitCastF16U16, F16, U16, ) -OPCODE(BitCastF32U32, F32, U32, ) -OPCODE(BitCastF64U64, F64, U64, ) -OPCODE(PackUint2x32, U64, U32x2, ) -OPCODE(UnpackUint2x32, U32x2, U64, ) -OPCODE(PackFloat2x16, U32, F16x2, ) -OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackHalf2x16, U32, F32x2, ) -OPCODE(UnpackHalf2x16, F32x2, U32, ) -OPCODE(PackDouble2x32, F64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, F64, ) +OPCODE(BitCastU16F16, U16, F16, ) +OPCODE(BitCastU32F32, U32, F32, ) +OPCODE(BitCastU64F64, U64, F64, ) +OPCODE(BitCastF16U16, F16, U16, ) +OPCODE(BitCastF32U32, F32, U32, ) +OPCODE(BitCastF64U64, F64, U64, ) +OPCODE(PackUint2x32, U64, U32x2, ) +OPCODE(UnpackUint2x32, U32x2, U64, ) +OPCODE(PackFloat2x16, U32, F16x2, ) +OPCODE(UnpackFloat2x16, F16x2, U32, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit -OPCODE(GetZeroFromOp, U1, Opaque, ) -OPCODE(GetSignFromOp, U1, Opaque, ) -OPCODE(GetCarryFromOp, U1, Opaque, ) -OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetZeroFromOp, U1, Opaque, ) +OPCODE(GetSignFromOp, U1, Opaque, ) +OPCODE(GetCarryFromOp, U1, Opaque, ) +OPCODE(GetOverflowFromOp, U1, Opaque, ) +OPCODE(GetSparseFromOp, U1, Opaque, ) // Floating-point operations -OPCODE(FPAbs16, F16, F16, ) -OPCODE(FPAbs32, F32, F32, ) -OPCODE(FPAbs64, F64, F64, ) -OPCODE(FPAdd16, F16, F16, F16, ) -OPCODE(FPAdd32, F32, F32, F32, ) -OPCODE(FPAdd64, F64, F64, F64, ) -OPCODE(FPFma16, F16, F16, F16, F16, ) -OPCODE(FPFma32, F32, F32, F32, F32, ) -OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) -OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) -OPCODE(FPMin64, F64, F64, F64, ) -OPCODE(FPMul16, F16, F16, F16, ) -OPCODE(FPMul32, F32, F32, F32, ) -OPCODE(FPMul64, F64, F64, F64, ) -OPCODE(FPNeg16, F16, F16, ) -OPCODE(FPNeg32, F32, F32, ) -OPCODE(FPNeg64, F64, F64, ) -OPCODE(FPRecip32, F32, F32, ) -OPCODE(FPRecip64, F64, F64, ) -OPCODE(FPRecipSqrt32, F32, F32, ) -OPCODE(FPRecipSqrt64, F64, F64, ) -OPCODE(FPSqrt, F32, F32, ) -OPCODE(FPSin, F32, F32, ) -OPCODE(FPExp2, F32, F32, ) -OPCODE(FPCos, F32, F32, ) -OPCODE(FPLog2, F32, F32, ) -OPCODE(FPSaturate16, F16, F16, ) -OPCODE(FPSaturate32, F32, F32, ) -OPCODE(FPSaturate64, F64, F64, ) -OPCODE(FPRoundEven16, F16, F16, ) -OPCODE(FPRoundEven32, F32, F32, ) -OPCODE(FPRoundEven64, F64, F64, ) -OPCODE(FPFloor16, F16, F16, ) -OPCODE(FPFloor32, F32, F32, ) -OPCODE(FPFloor64, F64, F64, ) -OPCODE(FPCeil16, F16, F16, ) -OPCODE(FPCeil32, F32, F32, ) -OPCODE(FPCeil64, F64, F64, ) -OPCODE(FPTrunc16, F16, F16, ) -OPCODE(FPTrunc32, F32, F32, ) -OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPAbs16, F16, F16, ) +OPCODE(FPAbs32, F32, F32, ) +OPCODE(FPAbs64, F64, F64, ) +OPCODE(FPAdd16, F16, F16, F16, ) +OPCODE(FPAdd32, F32, F32, F32, ) +OPCODE(FPAdd64, F64, F64, F64, ) +OPCODE(FPFma16, F16, F16, F16, F16, ) +OPCODE(FPFma32, F32, F32, F32, F32, ) +OPCODE(FPFma64, F64, F64, F64, F64, ) +OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax64, F64, F64, F64, ) +OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin64, F64, F64, F64, ) +OPCODE(FPMul16, F16, F16, F16, ) +OPCODE(FPMul32, F32, F32, F32, ) +OPCODE(FPMul64, F64, F64, F64, ) +OPCODE(FPNeg16, F16, F16, ) +OPCODE(FPNeg32, F32, F32, ) +OPCODE(FPNeg64, F64, F64, ) +OPCODE(FPRecip32, F32, F32, ) +OPCODE(FPRecip64, F64, F64, ) +OPCODE(FPRecipSqrt32, F32, F32, ) +OPCODE(FPRecipSqrt64, F64, F64, ) +OPCODE(FPSqrt, F32, F32, ) +OPCODE(FPSin, F32, F32, ) +OPCODE(FPExp2, F32, F32, ) +OPCODE(FPCos, F32, F32, ) +OPCODE(FPLog2, F32, F32, ) +OPCODE(FPSaturate16, F16, F16, ) +OPCODE(FPSaturate32, F32, F32, ) +OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPRoundEven16, F16, F16, ) +OPCODE(FPRoundEven32, F32, F32, ) +OPCODE(FPRoundEven64, F64, F64, ) +OPCODE(FPFloor16, F16, F16, ) +OPCODE(FPFloor32, F32, F32, ) +OPCODE(FPFloor64, F64, F64, ) +OPCODE(FPCeil16, F16, F16, ) +OPCODE(FPCeil32, F32, F32, ) +OPCODE(FPCeil64, F64, F64, ) +OPCODE(FPTrunc16, F16, F16, ) +OPCODE(FPTrunc32, F32, F32, ) +OPCODE(FPTrunc64, F64, F64, ) -OPCODE(FPOrdEqual16, U1, F16, F16, ) -OPCODE(FPOrdEqual32, U1, F32, F32, ) -OPCODE(FPOrdEqual64, U1, F64, F64, ) -OPCODE(FPUnordEqual16, U1, F16, F16, ) -OPCODE(FPUnordEqual32, U1, F32, F32, ) -OPCODE(FPUnordEqual64, U1, F64, F64, ) -OPCODE(FPOrdNotEqual16, U1, F16, F16, ) -OPCODE(FPOrdNotEqual32, U1, F32, F32, ) -OPCODE(FPOrdNotEqual64, U1, F64, F64, ) -OPCODE(FPUnordNotEqual16, U1, F16, F16, ) -OPCODE(FPUnordNotEqual32, U1, F32, F32, ) -OPCODE(FPUnordNotEqual64, U1, F64, F64, ) -OPCODE(FPOrdLessThan16, U1, F16, F16, ) -OPCODE(FPOrdLessThan32, U1, F32, F32, ) -OPCODE(FPOrdLessThan64, U1, F64, F64, ) -OPCODE(FPUnordLessThan16, U1, F16, F16, ) -OPCODE(FPUnordLessThan32, U1, F32, F32, ) -OPCODE(FPUnordLessThan64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) -OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) -OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) -OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) -OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) -OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) // Integer operations -OPCODE(IAdd32, U32, U32, U32, ) -OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(ISub32, U32, U32, U32, ) -OPCODE(ISub64, U64, U64, U64, ) -OPCODE(IMul32, U32, U32, U32, ) -OPCODE(INeg32, U32, U32, ) -OPCODE(INeg64, U64, U64, ) -OPCODE(IAbs32, U32, U32, ) -OPCODE(ShiftLeftLogical32, U32, U32, U32, ) -OPCODE(ShiftLeftLogical64, U64, U64, U32, ) -OPCODE(ShiftRightLogical32, U32, U32, U32, ) -OPCODE(ShiftRightLogical64, U64, U64, U32, ) -OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) -OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) -OPCODE(BitwiseAnd32, U32, U32, U32, ) -OPCODE(BitwiseOr32, U32, U32, U32, ) -OPCODE(BitwiseXor32, U32, U32, U32, ) -OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) -OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) -OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) -OPCODE(BitReverse32, U32, U32, ) -OPCODE(BitCount32, U32, U32, ) -OPCODE(BitwiseNot32, U32, U32, ) +OPCODE(IAdd32, U32, U32, U32, ) +OPCODE(IAdd64, U64, U64, U64, ) +OPCODE(ISub32, U32, U32, U32, ) +OPCODE(ISub64, U64, U64, U64, ) +OPCODE(IMul32, U32, U32, U32, ) +OPCODE(INeg32, U32, U32, ) +OPCODE(INeg64, U64, U64, ) +OPCODE(IAbs32, U32, U32, ) +OPCODE(ShiftLeftLogical32, U32, U32, U32, ) +OPCODE(ShiftLeftLogical64, U64, U64, U32, ) +OPCODE(ShiftRightLogical32, U32, U32, U32, ) +OPCODE(ShiftRightLogical64, U64, U64, U32, ) +OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) +OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) +OPCODE(BitwiseAnd32, U32, U32, U32, ) +OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseXor32, U32, U32, U32, ) +OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) +OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) +OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) +OPCODE(BitReverse32, U32, U32, ) +OPCODE(BitCount32, U32, U32, ) +OPCODE(BitwiseNot32, U32, U32, ) -OPCODE(FindSMsb32, U32, U32, ) -OPCODE(FindUMsb32, U32, U32, ) -OPCODE(SMin32, U32, U32, U32, ) -OPCODE(UMin32, U32, U32, U32, ) -OPCODE(SMax32, U32, U32, U32, ) -OPCODE(UMax32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) -OPCODE(IEqual, U1, U32, U32, ) -OPCODE(SLessThanEqual, U1, U32, U32, ) -OPCODE(ULessThanEqual, U1, U32, U32, ) -OPCODE(SGreaterThan, U1, U32, U32, ) -OPCODE(UGreaterThan, U1, U32, U32, ) -OPCODE(INotEqual, U1, U32, U32, ) -OPCODE(SGreaterThanEqual, U1, U32, U32, ) -OPCODE(UGreaterThanEqual, U1, U32, U32, ) +OPCODE(FindSMsb32, U32, U32, ) +OPCODE(FindUMsb32, U32, U32, ) +OPCODE(SMin32, U32, U32, U32, ) +OPCODE(UMin32, U32, U32, U32, ) +OPCODE(SMax32, U32, U32, U32, ) +OPCODE(UMax32, U32, U32, U32, ) +OPCODE(SLessThan, U1, U32, U32, ) +OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(IEqual, U1, U32, U32, ) +OPCODE(SLessThanEqual, U1, U32, U32, ) +OPCODE(ULessThanEqual, U1, U32, U32, ) +OPCODE(SGreaterThan, U1, U32, U32, ) +OPCODE(UGreaterThan, U1, U32, U32, ) +OPCODE(INotEqual, U1, U32, U32, ) +OPCODE(SGreaterThanEqual, U1, U32, U32, ) +OPCODE(UGreaterThanEqual, U1, U32, U32, ) // Logical operations -OPCODE(LogicalOr, U1, U1, U1, ) -OPCODE(LogicalAnd, U1, U1, U1, ) -OPCODE(LogicalXor, U1, U1, U1, ) -OPCODE(LogicalNot, U1, U1, ) +OPCODE(LogicalOr, U1, U1, U1, ) +OPCODE(LogicalAnd, U1, U1, U1, ) +OPCODE(LogicalXor, U1, U1, U1, ) +OPCODE(LogicalNot, U1, U1, ) // Conversion operations -OPCODE(ConvertS16F16, U32, F16, ) -OPCODE(ConvertS16F32, U32, F32, ) -OPCODE(ConvertS16F64, U32, F64, ) -OPCODE(ConvertS32F16, U32, F16, ) -OPCODE(ConvertS32F32, U32, F32, ) -OPCODE(ConvertS32F64, U32, F64, ) -OPCODE(ConvertS64F16, U64, F16, ) -OPCODE(ConvertS64F32, U64, F32, ) -OPCODE(ConvertS64F64, U64, F64, ) -OPCODE(ConvertU16F16, U32, F16, ) -OPCODE(ConvertU16F32, U32, F32, ) -OPCODE(ConvertU16F64, U32, F64, ) -OPCODE(ConvertU32F16, U32, F16, ) -OPCODE(ConvertU32F32, U32, F32, ) -OPCODE(ConvertU32F64, U32, F64, ) -OPCODE(ConvertU64F16, U64, F16, ) -OPCODE(ConvertU64F32, U64, F32, ) -OPCODE(ConvertU64F64, U64, F64, ) -OPCODE(ConvertU64U32, U64, U32, ) -OPCODE(ConvertU32U64, U32, U64, ) -OPCODE(ConvertF16F32, F16, F32, ) -OPCODE(ConvertF32F16, F32, F16, ) -OPCODE(ConvertF32F64, F32, F64, ) -OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertS16F16, U32, F16, ) +OPCODE(ConvertS16F32, U32, F32, ) +OPCODE(ConvertS16F64, U32, F64, ) +OPCODE(ConvertS32F16, U32, F16, ) +OPCODE(ConvertS32F32, U32, F32, ) +OPCODE(ConvertS32F64, U32, F64, ) +OPCODE(ConvertS64F16, U64, F16, ) +OPCODE(ConvertS64F32, U64, F32, ) +OPCODE(ConvertS64F64, U64, F64, ) +OPCODE(ConvertU16F16, U32, F16, ) +OPCODE(ConvertU16F32, U32, F32, ) +OPCODE(ConvertU16F64, U32, F64, ) +OPCODE(ConvertU32F16, U32, F16, ) +OPCODE(ConvertU32F32, U32, F32, ) +OPCODE(ConvertU32F64, U32, F64, ) +OPCODE(ConvertU64F16, U64, F16, ) +OPCODE(ConvertU64F32, U64, F32, ) +OPCODE(ConvertU64F64, U64, F64, ) +OPCODE(ConvertU64U32, U64, U32, ) +OPCODE(ConvertU32U64, U32, U64, ) +OPCODE(ConvertF16F32, F16, F32, ) +OPCODE(ConvertF32F16, F32, F16, ) +OPCODE(ConvertF32F64, F32, F64, ) +OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S32, F16, U32, ) +OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U32, F16, U32, ) +OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S32, F32, U32, ) +OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U32, F32, U32, ) +OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S32, F64, U32, ) +OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U32, F64, U32, ) +OPCODE(ConvertF64U64, F64, U64, ) + +// Image operations +OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 771094eb9..8fea05f7b 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } +[[nodiscard]] constexpr Reg operator++(Reg& reg) { + reg = reg + 1; + return reg; +} + +[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { + const Reg copy{reg}; + reg = reg + 1; + return copy; +} + [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { return static_cast(reg); } diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 9b7e1480b..3602883d6 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -75,6 +75,7 @@ private: f64 imm_f64; }; }; +static_assert(std::is_trivially_copyable_v); template class TypedValue : public Value { diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 5d0b91598..f2a2ff331 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----") INST(SURED, "SURED", "1110 1011 010- ----") INST(SUST, "SUST", "1110 1011 001- ----") INST(SYNC, "SYNC", "1111 0000 1111 1---") -INST(TEX, "TEX", "1100 00-- --11 1---") -INST(TEX_b, "TEX (b)", "1101 1110 1011 1---") +INST(TEX, "TEX", "1100 0--- ---- ----") +INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") INST(TLD, "TLD", "1101 1100 --11 1---") INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dbfc04f75..b270bbccd 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Blod : u64 { + None, + LZ, + LB, + LL, + INVALIDBLOD4, + INVALIDBLOD5, + LBA, + LLA, +}; + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + switch (type) { + case TextureType::_1D: + return v.F(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1)); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2)); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { + switch (blod) { + case Blod::None: + return v.ir.Imm32(0.0f); + case Blod::LZ: + return v.ir.Imm32(0.0f); + case Blod::LB: + case Blod::LL: + case Blod::LBA: + case Blod::LLA: + return v.F(reg++); + case Blod::INVALIDBLOD4: + case Blod::INVALIDBLOD5: + break; + } + throw NotImplementedException("Invalid blod {}", blod); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +bool HasExplicitLod(Blod blod) { + switch (blod) { + case Blod::LL: + case Blod::LLA: + case Blod::LZ: + return true; + default: + return false; + } +} + +void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, + std::optional cbuf_offset) { + union { + u64 raw; + BitField<35, 1, u64> ndv; + BitField<49, 1, u64> nodep; + BitField<50, 1, u64> dc; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + } const tex{insn}; + + if (lc) { + throw NotImplementedException("LC"); + } + const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; + + IR::Reg meta_reg{tex.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::F32 dref; + IR::F32 lod_clamp; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(meta_reg++); + } + const IR::F32 lod{MakeLod(v, meta_reg, blod)}; + if (aoffi) { + offset = MakeOffset(v, meta_reg, tex.type); + } + if (tex.dc != 0) { + dref = v.F(meta_reg++); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); + info.has_lod_clamp.Assign(lc ? 1 : 0); + + const IR::Value sample{[&]() -> IR::Value { + if (tex.dc == 0) { + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info); + } else { + return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); + } + } + if (HasExplicitLod(blod)) { + return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } else { + return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, + info); + } + }()}; + + for (int element = 0; element < 4; ++element) { + if (((tex.mask >> element) & 1) == 0) { + continue; + } + IR::F32 value; + if (tex.dc != 0) { + value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); + } else { + value = IR::F32{v.ir.CompositeExtract(sample, element)}; + } + v.F(tex.dest_reg + element, value); + } + if (tex.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TEX(u64 insn) { + union { + u64 raw; + BitField<54, 1, u64> aoffi; + BitField<55, 3, Blod> blod; + BitField<58, 1, u64> lc; + BitField<36, 13, u64> cbuf_offset; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); +} + +void TranslatorVisitor::TEX_b(u64 insn) { + union { + u64 raw; + BitField<36, 1, u64> aoffi; + BitField<37, 3, Blod> blod; + BitField<40, 1, u64> lc; + } const tex{insn}; + + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6662ef4cd..960beadd4 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) { throw NotImplementedException("Constant buffer with non-immediate index"); } break; + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::ImageSampleImplicitLod: + case IR::Opcode::ImageSampleExplicitLod: + case IR::Opcode::ImageSampleDrefImplicitLod: + case IR::Opcode::ImageSampleDrefExplicitLod: { + const TextureType type{inst.Flags().type}; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || + type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 965e52135..2625c0bb2 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -226,6 +226,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, } // Reversed loops are more likely to find the right result for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; if (inst->Opcode() == IR::Opcode::Phi) { // If we are going through a phi node, mark the current block as visited visited.insert(block); @@ -235,15 +236,11 @@ std::optional Track(IR::Block* block, const IR::Value& value, // Already visited, skip continue; } - const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } - } else { - const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)}; - if (storage_buffer) { - return *storage_buffer; - } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; + if (storage_buffer) { + return *storage_buffer; } } return std::nullopt; diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 38106308c..3b7e7306b 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -6,6 +6,7 @@ #include +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" #include "shader_recompiler/frontend/ir/program.h" @@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); +void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Function& function); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..80e4ad6a9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -0,0 +1,199 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include +#include + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Optimization { +namespace { +struct ConstBufferAddr { + u32 index; + u32 offset; +}; + +struct TextureInst { + ConstBufferAddr cbuf; + IR::Inst* inst; + IR::Block* block; +}; + +using TextureInstVector = boost::container::small_vector; + +using VisitedBlocks = boost::container::flat_set, + boost::container::small_vector>; + +IR::Opcode IndexedInstruction(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleImplicitLod: + return IR::Opcode::ImageSampleImplicitLod; + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + return IR::Opcode::ImageSampleExplicitLod; + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + return IR::Opcode::ImageSampleDrefImplicitLod; + case IR::Opcode::BoundImageSampleDrefExplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return IR::Opcode::ImageSampleDrefExplicitLod; + default: + return IR::Opcode::Void; + } +} + +bool IsBindless(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::BindlessImageSampleImplicitLod: + case IR::Opcode::BindlessImageSampleExplicitLod: + case IR::Opcode::BindlessImageSampleDrefImplicitLod: + case IR::Opcode::BindlessImageSampleDrefExplicitLod: + return true; + case IR::Opcode::BoundImageSampleImplicitLod: + case IR::Opcode::BoundImageSampleExplicitLod: + case IR::Opcode::BoundImageSampleDrefImplicitLod: + case IR::Opcode::BoundImageSampleDrefExplicitLod: + return false; + default: + throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + } +} + +bool IsTextureInstruction(const IR::Inst& inst) { + return IndexedInstruction(inst) != IR::Opcode::Void; +} + +std::optional Track(IR::Block* block, const IR::Value& value, + VisitedBlocks& visited) { + if (value.IsImmediate()) { + // Immediates can't be a storage buffer + return std::nullopt; + } + const IR::Inst* const inst{value.InstRecursive()}; + if (inst->Opcode() == IR::Opcode::GetCbuf) { + const IR::Value index{inst->Arg(0)}; + const IR::Value offset{inst->Arg(1)}; + if (!index.IsImmediate()) { + // Reading a bindless texture from variable indices is valid + // but not supported here at the moment + return std::nullopt; + } + if (!offset.IsImmediate()) { + // TODO: Support arrays of textures + return std::nullopt; + } + return ConstBufferAddr{ + .index{index.U32()}, + .offset{offset.U32()}, + }; + } + // Reversed loops are more likely to find the right result + for (size_t arg = inst->NumArgs(); arg--;) { + IR::Block* inst_block{block}; + if (inst->Opcode() == IR::Opcode::Phi) { + // If we are going through a phi node, mark the current block as visited + visited.insert(block); + // and skip already visited blocks to avoid looping forever + IR::Block* const phi_block{inst->PhiBlock(arg)}; + if (visited.contains(phi_block)) { + // Already visited, skip + continue; + } + inst_block = phi_block; + } + const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)}; + if (storage_buffer) { + return *storage_buffer; + } + } + return std::nullopt; +} + +TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { + ConstBufferAddr addr; + if (IsBindless(inst)) { + VisitedBlocks visited; + const std::optional track_addr{Track(block, IR::Value{&inst}, visited)}; + if (!track_addr) { + throw NotImplementedException("Failed to track bindless texture constant buffer"); + } + addr = *track_addr; + } else { + addr = ConstBufferAddr{ + .index{env.TextureBoundBuffer()}, + .offset{inst.Arg(0).U32()}, + }; + } + return TextureInst{ + .cbuf{addr}, + .inst{&inst}, + .block{block}, + }; +} + +class Descriptors { +public: + explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + + u32 Add(const TextureDescriptor& descriptor) { + // TODO: Handle arrays + auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { + return descriptor.cbuf_index == existing.cbuf_index && + descriptor.cbuf_offset == existing.cbuf_offset && + descriptor.type == existing.type; + })}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(descriptor); + return static_cast(descriptors.size()) - 1; + } + +private: + TextureDescriptors& descriptors; +}; +} // Anonymous namespace + +void TexturePass(Environment& env, IR::Program& program) { + TextureInstVector to_replace; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsTextureInstruction(inst)) { + continue; + } + to_replace.push_back(MakeInst(env, block, inst)); + } + } + } + // Sort instructions to visit textures by constant buffer index, then by offset + std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.offset < rhs.cbuf.offset; + }); + std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { + return lhs.cbuf.index < rhs.cbuf.index; + }); + Descriptors descriptors{program.info.texture_descriptors}; + for (TextureInst& texture_inst : to_replace) { + // TODO: Handle arrays + IR::Inst* const inst{texture_inst.inst}; + const u32 index{descriptors.Add(TextureDescriptor{ + .type{inst->Flags().type}, + .cbuf_index{texture_inst.cbuf.index}, + .cbuf_offset{texture_inst.cbuf.offset}, + .count{1}, + })}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + inst->SetArg(0, IR::Value{index}); + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 8766bf13e..103a2f0b4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -8,25 +8,51 @@ #include "common/common_types.h" +#include #include namespace Shader { +enum class TextureType : u32 { + Color1D, + ColorArray1D, + Color2D, + ColorArray2D, + Color3D, + ColorCube, + ColorArrayCube, + Shadow1D, + ShadowArray1D, + Shadow2D, + ShadowArray2D, + Shadow3D, + ShadowCube, + ShadowArrayCube, +}; + +struct TextureDescriptor { + TextureType type; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureDescriptors = boost::container::small_vector; + +struct ConstantBufferDescriptor { + u32 index; + u32 count; +}; + +struct StorageBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; + struct Info { static constexpr size_t MAX_CBUFS{18}; static constexpr size_t MAX_SSBOS{16}; - struct ConstantBufferDescriptor { - u32 index; - u32 count; - }; - - struct StorageBufferDescriptor { - u32 cbuf_index; - u32 cbuf_offset; - u32 count; - }; - bool uses_workgroup_id{}; bool uses_local_invocation_id{}; bool uses_fp16{}; @@ -35,12 +61,16 @@ struct Info { bool uses_fp16_denorms_preserve{}; bool uses_fp32_denorms_flush{}; bool uses_fp32_denorms_preserve{}; + bool uses_image_1d{}; + bool uses_sampled_1d{}; + bool uses_sparse_residency{}; u32 constant_buffer_mask{}; boost::container::static_vector constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; + TextureDescriptors texture_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index a658a3276..ef8bef6ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh }); ++binding; } + for (const auto& desc : info.texture_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } return device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( ++binding; offset += sizeof(DescriptorUpdateEntry); } + for (const auto& desc : info.texture_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, .pNext = nullptr, @@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( .set = 0, }); } + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + + u32 image; + u32 sampler; +}; + +VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); +} } // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, @@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.BindHostComputeBuffers(); } +void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, + TextureCache& texture_cache) { + texture_cache.SynchronizeComputeDescriptors(); + + static constexpr size_t max_elements = 64; + std::array image_view_ids; + boost::container::static_vector image_view_indices; + boost::container::static_vector sampler_handles; + + const auto& launch_desc{kepler_compute.launch_description}; + const auto& cbufs{launch_desc.const_buffer_config}; + const bool via_header_index{launch_desc.linked_tsc}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); + + const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; + const u32 raw_handle{gpu_memory.Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + sampler_handles.push_back(sampler->Handle()); + } + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + size_t index{}; + for (const auto& desc : info.texture_descriptors) { + const VkSampler vk_sampler{sampler_handles[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); + ++index; + } +} + VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index dc045d524..08d73a2a4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -6,9 +6,11 @@ #include "common/common_types.h" #include "shader_recompiler/shader_info.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -30,6 +32,8 @@ public: ComputePipeline(const ComputePipeline&) = delete; void ConfigureBufferCache(BufferCache& buffer_cache); + void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2497c2385..bcb7dd2eb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -76,6 +76,10 @@ public: return gpu_memory.Read(program_base + address); } + u32 TextureBoundBuffer() override { + return kepler_compute.regs.tex_cb_index; + } + std::array WorkgroupSize() override { const auto& qmd{kepler_compute.launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1b662f9f3..c94419d29 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() { if (!pipeline) { return; } - std::scoped_lock lock{buffer_cache.mutex}; + std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; update_descriptor_queue.Acquire(); pipeline->ConfigureBufferCache(buffer_cache); + pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; const auto& qmd{kepler_compute.launch_description}; -- cgit v1.2.3 From b9f7bf4472b8e0a5aad1aec3a5ff5bb56470bfff Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:51:40 -0500 Subject: spirv: Add SignedZeroInfNanPreserve logic --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 ++++++ src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 ++++ 3 files changed, 12 insertions(+) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index a94e9cb2d..c7cba6279 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -124,6 +124,12 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } ctx.AddExtension("SPV_KHR_float_controls"); + if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + } + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + } if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 917fc1251..c6a143598 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,8 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; + bool support_fp16_signed_zero_nan_preserve{}; + bool support_fp32_signed_zero_nan_preserve{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bcb7dd2eb..5477a2903 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -193,6 +193,10 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; -- cgit v1.2.3 From 260743f371236f7c57b01334b1c3474b15a47c39 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Mar 2021 19:28:31 -0300 Subject: shader: Add partial rasterizer integration --- src/shader_recompiler/CMakeLists.txt | 4 +- .../backend/spirv/emit_context.cpp | 64 ++- src/shader_recompiler/backend/spirv/emit_context.h | 18 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 44 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 18 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 55 ++- .../backend/spirv/emit_spirv_control_flow.cpp | 23 +- src/shader_recompiler/environment.h | 14 + src/shader_recompiler/frontend/ir/attribute.cpp | 2 +- src/shader_recompiler/frontend/ir/attribute.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 + src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 11 +- src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/ir/reg.h | 4 +- .../frontend/maxwell/control_flow.cpp | 31 +- .../frontend/maxwell/control_flow.h | 3 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/structured_control_flow.cpp | 18 + .../frontend/maxwell/translate/impl/exit.cpp | 15 - .../maxwell/translate/impl/exit_program.cpp | 43 ++ .../frontend/maxwell/translate/impl/impl.h | 4 +- .../translate/impl/load_store_attribute.cpp | 86 +++- .../maxwell/translate/impl/not_implemented.cpp | 16 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 60 ++- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- src/shader_recompiler/program_header.h | 143 +++++++ src/shader_recompiler/recompiler.cpp | 28 -- src/shader_recompiler/recompiler.h | 20 - src/shader_recompiler/shader_info.h | 10 + src/shader_recompiler/stage.h | 19 + src/video_core/CMakeLists.txt | 6 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 + .../renderer_vulkan/fixed_pipeline_state.h | 9 +- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 24 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 162 ++++++++ .../renderer_vulkan/vk_compute_pipeline.cpp | 209 ++-------- .../renderer_vulkan/vk_compute_pipeline.h | 3 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 445 +++++++++++++++++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 66 +++ src/video_core/renderer_vulkan/vk_pipeline.h | 36 -- .../renderer_vulkan/vk_pipeline_cache.cpp | 346 ++++++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 82 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 47 ++- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_render_pass_cache.cpp | 100 +++++ .../renderer_vulkan/vk_render_pass_cache.h | 53 +++ .../renderer_vulkan/vk_texture_cache.cpp | 68 +--- src/video_core/renderer_vulkan/vk_texture_cache.h | 29 +- src/video_core/vulkan_common/vulkan_device.cpp | 15 + 54 files changed, 1927 insertions(+), 566 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp create mode 100644 src/shader_recompiler/program_header.h delete mode 100644 src/shader_recompiler/recompiler.cpp delete mode 100644 src/shader_recompiler/recompiler.h create mode 100644 src/shader_recompiler/stage.h create mode 100644 src/video_core/renderer_vulkan/pipeline_helper.h create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_vulkan/vk_graphics_pipeline.h delete mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.cpp create mode 100644 src/video_core/renderer_vulkan/vk_render_pass_cache.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b870e9937..31c394106 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -121,9 +122,8 @@ add_library(shader_recompiler STATIC ir_opt/texture_pass.cpp ir_opt/verification_pass.cpp object_pool.h + program_header.h profile.h - recompiler.cpp - recompiler.h shader_info.h ) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 204389d74..6c79b611b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -62,18 +62,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineSpecialVariables(program.info); - - u32 binding{}; + DefineInterfaces(program.info, program.stage); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); - DefineLabels(program); } @@ -96,6 +93,8 @@ Id EmitContext::Def(const IR::Value& value) { return Constant(F32[1], value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); + case IR::Type::Label: + return value.Label()->Definition(); default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -109,6 +108,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + if (info.uses_int8) { AddCapability(spv::Capability::Int8); U8 = Name(TypeInt(8, false), "u8"); @@ -139,15 +141,20 @@ void EmitContext::DefineCommonConstants() { u32_zero_value = Constant(U32[1], 0U); } -void EmitContext::DefineSpecialVariables(const Info& info) { - const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { - const Id pointer_type{TypePointer(storage_class, type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; - Decorate(id, spv::Decoration::BuiltIn, builtin); - return id; - }}; +void EmitContext::DefineInterfaces(const Info& info, Stage stage) { + const auto define{ + [this](Id type, std::optional builtin, spv::StorageClass storage_class) { + const Id pointer_type{TypePointer(storage_class, type)}; + const Id id{AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + interfaces.push_back(id); + return id; + }}; using namespace std::placeholders; const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; + const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; if (info.uses_workgroup_id) { workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); @@ -155,6 +162,39 @@ void EmitContext::DefineSpecialVariables(const Info& info) { if (info.uses_local_invocation_id) { local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); } + if (info.loads_position) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = define_input(F32[4], built_in); + } + for (size_t i = 0; i < info.loads_generics.size(); ++i) { + if (info.loads_generics[i]) { + // FIXME: Declare size from input + input_generics[i] = define_input(F32[4], std::nullopt); + Decorate(input_generics[i], spv::Decoration::Location, static_cast(i)); + Name(input_generics[i], fmt::format("in_attr{}", i)); + } + } + if (info.stores_position) { + output_position = define_output(F32[4], spv::BuiltIn::Position); + } + for (size_t i = 0; i < info.stores_generics.size(); ++i) { + if (info.stores_generics[i]) { + output_generics[i] = define_output(F32[4], std::nullopt); + Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); + Name(output_generics[i], fmt::format("out_attr{}", i)); + } + } + if (stage == Stage::Fragment) { + for (size_t i = 0; i < 8; ++i) { + if (!info.stores_frag_color[i]) { + continue; + } + frag_color[i] = define_output(F32[4], std::nullopt); + Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); + Name(frag_color[i], fmt::format("frag_color{}", i)); + } + } } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 35eca258a..2d7961ac3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -46,7 +46,7 @@ struct UniformDefinitions { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -71,6 +71,9 @@ public: UniformDefinitions uniform_types; + Id input_f32{}; + Id output_f32{}; + Id storage_u32{}; std::array cbufs{}; @@ -80,10 +83,21 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id input_position{}; + std::array input_generics{}; + + Id output_position{}; + std::array output_generics{}; + + std::array frag_color{}; + Id frag_depth {}; + + std::vector interfaces; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineSpecialVariables(const Info& info); + void DefineInterfaces(const Info& info, Stage stage); void DefineConstantBuffers(const Info& info, u32& binding); void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 50c0f7243..b8978b94a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -54,6 +54,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.U32(); } else if constexpr (std::is_same_v) { return arg.Label(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); } } @@ -197,8 +199,9 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{profile, program}; +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, + u32& binding) { + EmitContext ctx{profile, program, binding}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; for (IR::Block* const block : program.blocks) { @@ -208,28 +211,41 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program } } ctx.OpFunctionEnd(); - boost::container::small_vector interfaces; - const Info& info{program.info}; - if (info.uses_workgroup_id) { - interfaces.push_back(ctx.workgroup_id); + + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (env.ShaderStage()) { + case Shader::Stage::Compute: { + const std::array workgroup_size{env.WorkgroupSize()}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; } - if (info.uses_local_invocation_id) { - interfaces.push_back(ctx.local_invocation_id); + case Shader::Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Shader::Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); + break; + default: + throw NotImplementedException("Stage {}", env.ShaderStage()); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); - - const std::array workgroup_size{env.WorkgroupSize()}; - ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], - workgroup_size[2]); + ctx.AddEntryPoint(execution_model, func, "main", interfaces); SetupDenormControl(profile, program, ctx, func); + const Info& info{program.info}; if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); } if (info.uses_sparse_residency) { ctx.AddCapability(spv::Capability::SparseResidency); } + if (info.uses_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89566c83d..ae121f534 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,18 +16,18 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, - IR::Program& program); + IR::Program& program, u32& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, IR::Block* label); -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); @@ -41,10 +41,12 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx); -void EmitSetAttribute(EmitContext& ctx); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); void EmitGetAttributeIndexed(EmitContext& ctx); void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); void EmitGetSFlag(EmitContext& ctx); void EmitGetCFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 125b58cf7..02d115740 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -5,6 +5,43 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} + +Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + default: + throw NotImplementedException("Read attribute {}", attr); + } +} +} // Anonymous namespace void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); @@ -87,12 +124,12 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset); } -void EmitGetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { + return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } -void EmitSetAttribute(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { + ctx.OpStore(OutputAttrPointer(ctx, attr), value); } void EmitGetAttributeIndexed(EmitContext&) { @@ -103,6 +140,16 @@ void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { + const Id component_id{ctx.Constant(ctx.U32[1], component)}; + const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; + ctx.OpStore(pointer, value); +} + +void EmitSetFragDepth(EmitContext& ctx, Id value) { + ctx.OpStore(ctx.frag_depth, value); +} + void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 48755b827..6b81f0169 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,26 +6,29 @@ namespace Shader::Backend::SPIRV { -void EmitBranch(EmitContext& ctx, IR::Block* label) { - ctx.OpBranch(label->Definition()); +void EmitBranch(EmitContext& ctx, Id label) { + ctx.OpBranch(label); } -void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { - ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label) { + ctx.OpBranchConditional(condition, true_label, false_label); } -void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { - ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), - spv::LoopControlMask::MaskNone); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label) { + ctx.OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); } -void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { - ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label) { + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); } void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { + ctx.OpDemoteToHelperInvocationEXT(); + ctx.OpBranch(continue_label); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0fcb68050..1fcaa56dd 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,6 +3,8 @@ #include #include "common/common_types.h" +#include "shader_recompiler/stage.h" +#include "shader_recompiler/program_header.h" namespace Shader { @@ -15,6 +17,18 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; [[nodiscard]] virtual std::array WorkgroupSize() = 0; + + [[nodiscard]] const ProgramHeader& SPH() const noexcept { + return sph; + } + + [[nodiscard]] Stage ShaderStage() const noexcept { + return stage; + } + +protected: + ProgramHeader sph{}; + Stage stage{}; }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 2fb7d576f..4811242ea 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -13,7 +13,7 @@ bool IsGeneric(Attribute attribute) noexcept { return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; } -int GenericAttributeIndex(Attribute attribute) { +u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index bb2cad6af..34ec7e0cd 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -224,7 +224,7 @@ enum class Attribute : u64 { [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; -[[nodiscard]] int GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 958282160..672836c0b 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -82,6 +82,12 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::DemoteToHelperInvocation(Block* continue_label) { + block->SetBranch(continue_label); + continue_label->AddImmediatePredecessor(block); + Inst(Opcode::DemoteToHelperInvocation, continue_label); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } @@ -248,6 +254,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { Inst(Opcode::SetAttribute, attribute, value); } +void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { + Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); +} + +void IREmitter::SetFragDepth(const F32& value) { + Inst(Opcode::SetFragDepth, value); +} + U32 IREmitter::WorkgroupIdX() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 05263fe8b..72af5db37 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -36,6 +36,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void DemoteToHelperInvocation(Block* continue_label); [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); @@ -67,6 +68,9 @@ public: [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); void SetAttribute(IR::Attribute attribute, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); + void SetFragDepth(const F32& value); + [[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5946105d2..21b7d8a9f 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,8 +55,11 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::DemoteToHelperInvocation: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetFragColor: + case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: case Opcode::WriteGlobalS8: case Opcode::WriteGlobalU16: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9052a4903..593faca52 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(DemoteToHelperInvocation, Void, Label, ) // Context getters/setters OPCODE(GetRegister, U32, Reg, ) @@ -28,10 +29,12 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU64, U64, U32, U32, ) -OPCODE(GetAttribute, U32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, U32, ) -OPCODE(GetAttributeIndexed, U32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, U32, ) +OPCODE(GetAttribute, F32, Attribute, ) +OPCODE(SetAttribute, Void, Attribute, F32, ) +OPCODE(GetAttributeIndexed, F32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(SetFragColor, Void, U32, U32, F32, ) +OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) OPCODE(GetSFlag, U1, Void, ) OPCODE(GetCFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index bce8b19b3..733513c8b 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/stage.h" namespace Shader::IR { @@ -17,6 +18,7 @@ struct Program { BlockList blocks; BlockList post_order_blocks; Info info; + Stage stage{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h index 8fea05f7b..3845ec5fb 100644 --- a/src/shader_recompiler/frontend/ir/reg.h +++ b/src/shader_recompiler/frontend/ir/reg.h @@ -293,12 +293,12 @@ constexpr size_t NUM_REGS = 256; return reg + (-num); } -[[nodiscard]] constexpr Reg operator++(Reg& reg) { +constexpr Reg operator++(Reg& reg) { reg = reg + 1; return reg; } -[[nodiscard]] constexpr Reg operator++(Reg& reg, int) { +constexpr Reg operator++(Reg& reg, int) { const Reg copy{reg}; reg = reg + 1; return copy; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 715c0e92d..4f6707fae 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -104,6 +104,7 @@ bool HasFlowTest(Opcode opcode) { case Opcode::EXIT: case Opcode::JMP: case Opcode::JMX: + case Opcode::KIL: case Opcode::BRK: case Opcode::CONT: case Opcode::LONGJMP: @@ -287,6 +288,13 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati block->end = pc; return AnalysisState::Branch; } + case Opcode::KIL: { + const Predicate pred{inst.Pred()}; + const auto ir_pred{static_cast(pred.index)}; + const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; + AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); + return AnalysisState::Branch; + } case Opcode::PBK: case Opcode::PCNT: case Opcode::PEXIT: @@ -324,13 +332,12 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } const IR::Condition cond{static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond, true); + AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); return AnalysisState::Branch; } void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, - EndClass insn_end_class, IR::Condition cond, - bool visit_conditional_inst) { + EndClass insn_end_class, IR::Condition cond) { if (block->begin != pc) { // If the block doesn't start in the conditional instruction // mark it as a label to visit it later @@ -356,14 +363,16 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, // Impersonate the visited block with a virtual block *block = std::move(virtual_block); // Set the end properties of the conditional instruction - conditional_block->end = visit_conditional_inst ? (pc + 1) : pc; + conditional_block->end = pc + 1; conditional_block->end_class = insn_end_class; // Add a label to the instruction after the conditional instruction Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; // Branch to the next instruction from the virtual block block->branch_false = endif_block; - // And branch to it from the conditional instruction if it is a branch - if (insn_end_class == EndClass::Branch) { + // And branch to it from the conditional instruction if it is a branch or a kill instruction + // Kill instructions are considered a branch because they demote to a helper invocation and + // execution may continue. + if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { conditional_block->cond = IR::Condition{true}; conditional_block->branch_true = endif_block; conditional_block->branch_false = nullptr; @@ -415,7 +424,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati throw NotImplementedException("Conditional EXIT with PEXIT token"); } const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; - AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond, false); + AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); return AnalysisState::Branch; } if (const std::optional exit_pc{block->stack.Peek(Token::PEXIT)}) { @@ -425,7 +434,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati block->branch_false = nullptr; return AnalysisState::Branch; } - block->end = pc; + block->end = pc + 1; block->end_class = EndClass::Exit; return AnalysisState::Branch; } @@ -505,6 +514,12 @@ std::string CFG::Dot() const { node_uid); ++node_uid; break; + case EndClass::Kill: + dot += fmt::format("\t\t{}->N{};\n", name, node_uid); + dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", + node_uid); + ++node_uid; + break; } } if (function.entrypoint == 8) { diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index fe74f210f..22f134194 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -29,6 +29,7 @@ enum class EndClass { Call, Exit, Return, + Kill, }; enum class Token { @@ -130,7 +131,7 @@ private: AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, - IR::Condition cond, bool visit_conditional_inst); + IR::Condition cond); /// Return true when the branch instruction is confirmed to be a branch bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8bfa64326..0074eb89b 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -32,6 +32,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolid, DumpExpr(stmt->op)); break; @@ -424,6 +430,9 @@ private: gotos.push_back(root.insert(ip, *goto_stmt)); break; } + case Flow::EndClass::Kill: + root.insert(ip, *pool.Create(Kill{})); + break; } } } @@ -729,6 +738,15 @@ private: current_block = nullptr; break; } + case StatementType::Kill: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.DemoteToHelperInvocation(continue_block); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp deleted file mode 100644 index e98bbd0d1..000000000 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/exit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/common_types.h" -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" - -namespace Shader::Maxwell { - -void TranslatorVisitor::EXIT(u64) { - ir.Exit(); -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..ea9b33da9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp @@ -0,0 +1,43 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void ExitFragment(TranslatorVisitor& v) { + const ProgramHeader sph{v.env.SPH()}; + IR::Reg src_reg{IR::Reg::R0}; + for (u32 render_target = 0; render_target < 8; ++render_target) { + const std::array mask{sph.ps.EnabledOutputComponents(render_target)}; + for (u32 component = 0; component < 4; ++component) { + if (!mask[component]) { + continue; + } + v.ir.SetFragColor(render_target, component, v.F(src_reg)); + ++src_reg; + } + } + if (sph.ps.omap.sample_mask != 0) { + throw NotImplementedException("Sample mask"); + } + if (sph.ps.omap.depth != 0) { + throw NotImplementedException("Fragment depth"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::EXIT() { + switch (env.ShaderStage()) { + case Stage::Fragment: + ExitFragment(*this); + break; + default: + break; + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index e3e298c3b..ed81d9c36 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -108,7 +108,7 @@ public: void DSETP_reg(u64 insn); void DSETP_cbuf(u64 insn); void DSETP_imm(u64 insn); - void EXIT(u64 insn); + void EXIT(); void F2F_reg(u64 insn); void F2F_cbuf(u64 insn); void F2F_imm(u64 insn); @@ -220,7 +220,7 @@ public: void JCAL(u64 insn); void JMP(u64 insn); void JMX(u64 insn); - void KIL(u64 insn); + void KIL(); void LD(u64 insn); void LDC(u64 insn); void LDG(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index ad97786d4..2922145ee 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -11,6 +11,13 @@ namespace Shader::Maxwell { namespace { +enum class Size : u64 { + B32, + B64, + B96, + B128, +}; + enum class InterpolationMode : u64 { Pass, Multiply, @@ -23,8 +30,85 @@ enum class SampleMode : u64 { Centroid, Offset, }; + +int NumElements(Size size) { + switch (size) { + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B96: + return 3; + case Size::B128: + return 4; + } + throw InvalidArgument("Invalid size {}", size); +} } // Anonymous namespace +void TranslatorVisitor::ALD(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<39, 8, IR::Reg> stream_reg; + BitField<32, 1, u64> o; + BitField<31, 1, u64> patch; + BitField<47, 2, Size> size; + } const ald{insn}; + + if (ald.o != 0) { + throw NotImplementedException("O"); + } + if (ald.patch != 0) { + throw NotImplementedException("P"); + } + if (ald.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed"); + } + const u64 offset{ald.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ald.size)}; + for (int element = 0; element < num_elements; ++element) { + F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + } +} + +void TranslatorVisitor::AST(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg; + BitField<8, 8, IR::Reg> index_reg; + BitField<20, 10, u64> absolute_offset; + BitField<20, 11, s64> relative_offset; + BitField<31, 1, u64> patch; + BitField<39, 8, IR::Reg> stream_reg; + BitField<47, 2, Size> size; + } const ast{insn}; + + if (ast.patch != 0) { + throw NotImplementedException("P"); + } + if (ast.stream_reg != IR::Reg::RZ) { + throw NotImplementedException("Stream store"); + } + if (ast.index_reg != IR::Reg::RZ) { + throw NotImplementedException("Indexed store"); + } + const u64 offset{ast.absolute_offset.Value()}; + if (offset % 4 != 0) { + throw NotImplementedException("Unaligned absolute offset {}", offset); + } + const int num_elements{NumElements(ast.size)}; + for (int element = 0; element < num_elements; ++element) { + ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + } +} + void TranslatorVisitor::IPA(u64 insn) { // IPA is the instruction used to read varyings from a fragment shader. // gl_FragCoord is mapped to the gl_Position attribute. @@ -51,7 +135,7 @@ void TranslatorVisitor::IPA(u64 insn) { // } const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; if (is_indexed) { - throw NotImplementedException("IPA.IDX"); + throw NotImplementedException("IDX"); } const IR::Attribute attribute{ipa.attribute}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9675cef54..59252bcc5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,14 +17,6 @@ void TranslatorVisitor::AL2P(u64) { ThrowNotImplemented(Opcode::AL2P); } -void TranslatorVisitor::ALD(u64) { - ThrowNotImplemented(Opcode::ALD); -} - -void TranslatorVisitor::AST(u64) { - ThrowNotImplemented(Opcode::AST); -} - void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } @@ -153,10 +145,6 @@ void TranslatorVisitor::DSETP_imm(u64) { ThrowNotImplemented(Opcode::DSETP_imm); } -void TranslatorVisitor::EXIT(u64) { - throw LogicError("Visting EXIT instruction"); -} - void TranslatorVisitor::F2F_reg(u64) { ThrowNotImplemented(Opcode::F2F_reg); } @@ -345,8 +333,8 @@ void TranslatorVisitor::JMX(u64) { ThrowNotImplemented(Opcode::JMX); } -void TranslatorVisitor::KIL(u64) { - ThrowNotImplemented(Opcode::KIL); +void TranslatorVisitor::KIL() { + // KIL is a no-op } void TranslatorVisitor::LD(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 98d9f4c64..0fbb87ec4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -215,7 +215,7 @@ void TranslatorVisitor::TEX(u64 insn) { BitField<36, 13, u64> cbuf_offset; } const tex{insn}; - Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset)); + Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast(tex.cbuf_offset * 4)); } void TranslatorVisitor::TEX_b(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index ac1615b00..54f0df754 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -70,7 +70,7 @@ IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { IR::Value Sample(TranslatorVisitor& v, u64 insn) { const Encoding texs{insn}; - const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset))}; + const IR::U32 handle{v.ir.Imm32(static_cast(texs.cbuf_offset * 4))}; const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::Reg reg_a{texs.src_reg_a}; const IR::Reg reg_b{texs.src_reg_b}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 708b6b267..fbbe28632 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -17,10 +17,47 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { return; } info.constant_buffer_mask |= 1U << index; - info.constant_buffer_descriptors.push_back({ - .index{index}, - .count{1}, - }); + + auto& cbufs{info.constant_buffer_descriptors}; + cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), + ConstantBufferDescriptor{ + .index{index}, + .count{1}, + }); +} + +void GetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.loads_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.loads_position = true; + break; + default: + throw NotImplementedException("Get attribute {}", attribute); + } +} + +void SetAttribute(Info& info, IR::Attribute attribute) { + if (IR::IsGeneric(attribute)) { + info.stores_generics.at(IR::GenericAttributeIndex(attribute)) = true; + return; + } + switch (attribute) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + info.stores_position = true; + break; + default: + throw NotImplementedException("Set attribute {}", attribute); + } } void VisitUsages(Info& info, IR::Inst& inst) { @@ -162,6 +199,21 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; } switch (inst.Opcode()) { + case IR::Opcode::DemoteToHelperInvocation: + info.uses_demote_to_helper_invocation = true; + break; + case IR::Opcode::GetAttribute: + GetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetAttribute: + SetAttribute(info, inst.Arg(0).Attribute()); + break; + case IR::Opcode::SetFragColor: + info.stores_frag_color[inst.Arg(0).U32()] = true; + break; + case IR::Opcode::SetFragDepth: + info.stores_frag_depth = true; + break; case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; break; diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index d09bcec36..bab7ca186 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -169,7 +169,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op == same || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { // Unique value or self-reference continue; } diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..1544bfa42 --- /dev/null +++ b/src/shader_recompiler/program_header.h @@ -0,0 +1,143 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Shader { + +enum class OutputTopology : u32 { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7, +}; + +enum class PixelImap : u8 { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3, +}; + +// Documentation in: +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html +struct ProgramHeader { + union { + BitField<0, 5, u32> sph_type; + BitField<5, 5, u32> version; + BitField<10, 4, u32> shader_type; + BitField<14, 1, u32> mrt_enable; + BitField<15, 1, u32> kills_pixels; + BitField<16, 1, u32> does_global_store; + BitField<17, 4, u32> sass_version; + BitField<21, 5, u32> reserved; + BitField<26, 1, u32> does_load_or_store; + BitField<27, 1, u32> does_fp64; + BitField<28, 4, u32> stream_out_mask; + } common0; + + union { + BitField<0, 24, u32> shader_local_memory_low_size; + BitField<24, 8, u32> per_patch_attribute_count; + } common1; + + union { + BitField<0, 24, u32> shader_local_memory_high_size; + BitField<24, 8, u32> threads_per_input_primitive; + } common2; + + union { + BitField<0, 24, u32> shader_local_memory_crs_size; + BitField<24, 4, OutputTopology> output_topology; + BitField<28, 4, u32> reserved; + } common3; + + union { + BitField<0, 12, u32> max_output_vertices; + BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + } common4; + + union { + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + union { + BitField<0, 8, u16> clip_distances; + BitField<8, 1, u16> point_sprite_s; + BitField<9, 1, u16> point_sprite_t; + BitField<10, 1, u16> fog_coordinate; + BitField<12, 1, u16> tessellation_eval_point_u; + BitField<13, 1, u16> tessellation_eval_point_v; + BitField<14, 1, u16> instance_id; + BitField<15, 1, u16> vertex_id; + }; + INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved + INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB + INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] + INSERT_PADDING_BYTES_NOINIT(2); // OmapColor + INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved + } vtg; + + struct { + INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA + INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB + + union { + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; + u8 raw; + } imap_generic_vector[32]; + + INSERT_PADDING_BYTES_NOINIT(2); // ImapColor + INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC + INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] + INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved + + struct { + u32 target; + union { + BitField<0, 1, u32> sample_mask; + BitField<1, 1, u32> depth; + BitField<2, 30, u32> reserved; + }; + } omap; + + [[nodiscard]] std::array EnabledOutputComponents(u32 rt) const noexcept { + const u32 bits{omap.target >> (rt * 4)}; + return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; + } + + [[nodiscard]] std::array GenericInputMap(u32 attribute) const { + const auto& vector{imap_generic_vector[attribute]}; + return {vector.x, vector.y, vector.z, vector.w}; + } + } ps; + + std::array raw; + }; + + [[nodiscard]] u64 LocalMemorySize() const noexcept { + return (common1.shader_local_memory_low_size | + (common2.shader_local_memory_high_size << 24)); + } +}; +static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp deleted file mode 100644 index 527e19c27..000000000 --- a/src/shader_recompiler/recompiler.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/object_pool.h" -#include "shader_recompiler/recompiler.h" - -namespace Shader { - -std::pair> RecompileSPIRV(const Profile& profile, Environment& env, - u32 start_address) { - ObjectPool flow_block_pool; - ObjectPool inst_pool; - ObjectPool block_pool; - - Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; - IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; -} - -} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h deleted file mode 100644 index 2529463ae..000000000 --- a/src/shader_recompiler/recompiler.h +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" -#include "shader_recompiler/environment.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/shader_info.h" - -namespace Shader { - -[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, - Environment& env, u32 start_address); - -} // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index adc1d9a64..6eff762e2 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -56,6 +56,15 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + + std::array loads_generics{}; + bool loads_position{}; + + std::array stores_frag_color{}; + bool stores_frag_depth{}; + std::array stores_generics{}; + bool stores_position{}; + bool uses_fp16{}; bool uses_fp64{}; bool uses_fp16_denorms_flush{}; @@ -68,6 +77,7 @@ struct Info { bool uses_image_1d{}; bool uses_sampled_1d{}; bool uses_sparse_residency{}; + bool uses_demote_to_helper_invocation{}; IR::Type used_constant_buffer_types{}; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..fc6ce6043 --- /dev/null +++ b/src/shader_recompiler/stage.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +enum class Stage { + Compute, + VertexA, + VertexB, + TessellationControl, + TessellationEval, + Geometry, + Fragment, +}; + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3323e6916..71b07c194 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,7 @@ add_library(video_core STATIC renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/pipeline_helper.h renderer_vulkan/renderer_vulkan.h renderer_vulkan/renderer_vulkan.cpp renderer_vulkan/vk_blit_screen.cpp @@ -116,15 +117,18 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h + renderer_vulkan/vk_graphics_pipeline.cpp + renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h - renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_render_pass_cache.cpp + renderer_vulkan/vk_render_pass_cache.h renderer_vulkan/vk_resource_pool.cpp renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_scheduler.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d8f683907 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -72,6 +72,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); + depth_format.Assign(static_cast(regs.zeta.format)); + std::ranges::transform(regs.rt, color_formats.begin(), + [](const auto& rt) { return static_cast(rt.format); }); alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..348f1d6ce 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -60,7 +60,7 @@ struct FixedPipelineState { void Refresh(const Maxwell& regs, size_t index); - constexpr std::array Mask() const noexcept { + std::array Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; } @@ -97,11 +97,11 @@ struct FixedPipelineState { BitField<20, 3, u32> type; BitField<23, 6, u32> size; - constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); } - constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + Maxwell::VertexAttribute::Size Size() const noexcept { return static_cast(size.Value()); } }; @@ -187,7 +187,10 @@ struct FixedPipelineState { u32 raw2; BitField<0, 3, u32> alpha_test_func; BitField<3, 1, u32> early_z; + BitField<4, 1, u32> depth_enabled; + BitField<5, 5, u32> depth_format; }; + std::array color_formats; u32 alpha_test_ref; u32 point_size; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..dc4ff0da2 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -741,4 +741,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; } +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { + switch (msaa_mode) { + case Tegra::Texture::MsaaMode::Msaa1x1: + return VK_SAMPLE_COUNT_1_BIT; + case Tegra::Texture::MsaaMode::Msaa2x1: + case Tegra::Texture::MsaaMode::Msaa2x1_D3D: + return VK_SAMPLE_COUNT_2_BIT; + case Tegra::Texture::MsaaMode::Msaa2x2: + case Tegra::Texture::MsaaMode::Msaa2x2_VC4: + case Tegra::Texture::MsaaMode::Msaa2x2_VC12: + return VK_SAMPLE_COUNT_4_BIT; + case Tegra::Texture::MsaaMode::Msaa4x2: + case Tegra::Texture::MsaaMode::Msaa4x2_D3D: + case Tegra::Texture::MsaaMode::Msaa4x2_VC8: + case Tegra::Texture::MsaaMode::Msaa4x2_VC24: + return VK_SAMPLE_COUNT_8_BIT; + case Tegra::Texture::MsaaMode::Msaa4x4: + return VK_SAMPLE_COUNT_16_BIT; + default: + UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast(msaa_mode)); + return VK_SAMPLE_COUNT_1_BIT; + } +} + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..9f78e15b6 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -71,4 +71,6 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); +VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); + } // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..0a59aa659 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -0,0 +1,162 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/types.h" +#include "video_core/textures/texture.h" + +namespace Vulkan { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + [[likely]] if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +struct DescriptorLayoutTuple { + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; +}; + +class DescriptorLayoutBuilder { +public: + DescriptorLayoutTuple Create(const vk::Device& device) { + DescriptorLayoutTuple result; + if (!bindings.empty()) { + result.descriptor_set_layout = device.CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + result.pipeline_layout = device.CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); + if (!entries.empty()) { + result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *result.descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *result.pipeline_layout, + .set = 0, + }); + } + return result; + } + + void Add(const Shader::Info& info, VkShaderStageFlags stage) { + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); + } + for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); + } + } + +private: + void Add(VkDescriptorType type, VkShaderStageFlags stage) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + + boost::container::small_vector bindings; + boost::container::small_vector entries; + u32 binding{}; + size_t offset{}; +}; + +inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Shadow1D: + return VideoCommon::ImageViewType::e1D; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ShadowArray1D: + return VideoCommon::ImageViewType::e1DArray; + case Shader::TextureType::Color2D: + case Shader::TextureType::Shadow2D: + return VideoCommon::ImageViewType::e2D; + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::ShadowArray2D: + return VideoCommon::ImageViewType::e2DArray; + case Shader::TextureType::Color3D: + case Shader::TextureType::Shadow3D: + return VideoCommon::ImageViewType::e3D; + case Shader::TextureType::ColorCube: + case Shader::TextureType::ShadowCube: + return VideoCommon::ImageViewType::Cube; + case Shader::TextureType::ColorArrayCube: + case Shader::TextureType::ShadowArrayCube: + return VideoCommon::ImageViewType::CubeArray; + } + UNREACHABLE_MSG("Invalid texture type {}", type); + return {}; +} + +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, + const ImageId* image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { + for (const auto& desc : info.texture_descriptors) { + const VkSampler sampler{samplers[index]}; + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + ++index; + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ef8bef6ff..6684d37a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -6,6 +6,7 @@ #include +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" @@ -17,140 +18,10 @@ namespace Vulkan { namespace { -vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { - boost::container::small_vector bindings; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - for (const auto& desc : info.texture_descriptors) { - bindings.push_back({ - .binding = binding, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }); - ++binding; - } - return device.GetLogical().CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); -} - -vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( - const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) { - boost::container::small_vector entries; - size_t offset{}; - u32 binding{}; - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - for (const auto& desc : info.texture_descriptors) { - entries.push_back({ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); - } - return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, - .descriptorSetLayout = descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, - .pipelineLayout = pipeline_layout, - .set = 0, - }); -} - -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - } - UNREACHABLE_MSG("Invalid texture type {}", type); +DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { + DescriptorLayoutBuilder builder; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + return builder.Create(device.GetLogical()); } } // Anonymous namespace @@ -158,37 +29,31 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip VKUpdateDescriptorQueue& update_descriptor_queue_, const Shader::Info& info_, vk::ShaderModule spv_module_) : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, - spv_module(std::move(spv_module_)), - descriptor_set_layout(CreateDescriptorSetLayout(device, info)), - descriptor_allocator(descriptor_pool, *descriptor_set_layout), - pipeline_layout{device.GetLogical().CreatePipelineLayout({ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - })}, - descriptor_update_template{ - CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, - pipeline{device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - })} {} + spv_module(std::move(spv_module_)) { + DescriptorLayoutTuple tuple{CreateLayout(device, info)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); +} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); @@ -211,7 +76,7 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple static constexpr size_t max_elements = 64; std::array image_view_ids; boost::container::static_vector image_view_indices; - boost::container::static_vector sampler_handles; + boost::container::static_vector samplers; const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; @@ -228,20 +93,14 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); + samplers.push_back(sampler->Handle()); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); size_t index{}; - for (const auto& desc : info.texture_descriptors) { - const VkSampler vk_sampler{sampler_handles[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; - update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler); - ++index; - } + PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, + *update_descriptor_queue, index); } VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 08d73a2a4..e82e5816b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -9,7 +9,6 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" -#include "video_core/renderer_vulkan/vk_pipeline.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -18,7 +17,7 @@ namespace Vulkan { class Device; -class ComputePipeline : public Pipeline { +class ComputePipeline { public: explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp new file mode 100644 index 000000000..a2ec418b1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -0,0 +1,445 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include +#include + +#include "common/bit_field.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +namespace { +using boost::container::small_vector; +using boost::container::static_vector; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; + +DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder; + for (size_t index = 0; index < infos.size(); ++index) { + static constexpr std::array stages{ + VK_SHADER_STAGE_VERTEX_BIT, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + VK_SHADER_STAGE_GEOMETRY_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, + }; + builder.Add(infos[index], stages.at(index)); + } + return builder.Create(device.GetLogical()); +} + +template +VkStencilOpState GetStencilFaceState(const StencilFace& face) { + return { + .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), + .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), + .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), + .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), + .compareMask = 0, + .writeMask = 0, + .reference = 0, + }; +} + +bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { + static constexpr std::array unsupported_topologies{ + VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, + }; + return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); +} + +VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { + union Swizzle { + u32 raw; + BitField<0, 3, Maxwell::ViewportSwizzle> x; + BitField<4, 3, Maxwell::ViewportSwizzle> y; + BitField<8, 3, Maxwell::ViewportSwizzle> z; + BitField<12, 3, Maxwell::ViewportSwizzle> w; + }; + const Swizzle unpacked{swizzle}; + return VkViewportSwizzleNV{ + .x = MaxwellToVK::ViewportSwizzle(unpacked.x), + .y = MaxwellToVK::ViewportSwizzle(unpacked.y), + .z = MaxwellToVK::ViewportSwizzle(unpacked.z), + .w = MaxwellToVK::ViewportSwizzle(unpacked.w), + }; +} + +PixelFormat DecodeFormat(u8 encoded_format) { + const auto format{static_cast(encoded_format)}; + if (format == Tegra::RenderTargetFormat::NONE) { + return PixelFormat::Invalid; + } + return PixelFormatFromRenderTargetFormat(format); +} + +RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { + RenderPassKey key; + std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); + if (state.depth_enabled != 0) { + const auto depth_format{static_cast(state.depth_format.Value())}; + key.depth_format = PixelFormatFromDepthFormat(depth_format); + } else { + key.depth_format = PixelFormat::Invalid; + } + key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); + return key; +} +} // Anonymous namespace + +GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, + BufferCache& buffer_cache_, TextureCache& texture_cache_, + const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache, + const FixedPipelineState& state, + std::array stages, + const std::array& infos) + : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, + buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, + update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; + descriptor_set_layout = std::move(tuple.descriptor_set_layout); + pipeline_layout = std::move(tuple.pipeline_layout); + descriptor_update_template = std::move(tuple.descriptor_update_template); + descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, state, render_pass); +} + +void GraphicsPipeline::Configure(bool is_indexed) { + static constexpr size_t max_images_elements = 64; + std::array image_view_ids; + static_vector image_view_indices; + static_vector samplers; + + texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache->UpdateRenderTargets(false); + + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache->UnbindGraphicsStorageBuffers(stage); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); + ++index; + } + const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + for (const auto& desc : info.texture_descriptors) { + const u32 cbuf_index{desc.cbuf_index}; + const u32 cbuf_offset{desc.cbuf_offset}; + ASSERT(cbufs[cbuf_index].enabled); + const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; + const u32 raw_handle{gpu_memory->Read(addr)}; + + const TextureHandle handle(raw_handle, via_header_index); + image_view_indices.push_back(handle.image); + + Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + buffer_cache->UpdateGraphicsBuffers(is_indexed); + texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + + buffer_cache->BindHostGeometryBuffers(is_indexed); + + size_t index{}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + buffer_cache->BindHostStageBuffers(stage); + PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), + *texture_cache, *update_descriptor_queue, index); + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + + scheduler->BindGraphicsPipeline(*pipeline); + scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, + nullptr); + }); +} + +void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass) { + FixedPipelineState::DynamicState dynamic{}; + if (!device.IsExtExtendedDynamicStateSupported()) { + dynamic = state.dynamic_state; + } + static_vector vertex_bindings; + static_vector vertex_binding_divisors; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = state.binding_divisors[index] != 0; + const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ + .binding = static_cast(index), + .stride = dynamic.vertex_strides[index], + .inputRate = rate, + }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = state.binding_divisors[index], + }); + } + } + static_vector vertex_attributes; + const auto& input_attributes = stage_infos[0].loads_generics; + for (size_t index = 0; index < state.attributes.size(); ++index) { + const auto& attribute = state.attributes[index]; + if (!attribute.enabled || !input_attributes[index]) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); + } + VkPipelineVertexInputStateCreateInfo vertex_input_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .vertexBindingDescriptionCount = static_cast(vertex_bindings.size()), + .pVertexBindingDescriptions = vertex_bindings.data(), + .vertexAttributeDescriptionCount = static_cast(vertex_attributes.size()), + .pVertexAttributeDescriptions = vertex_attributes.data(), + }; + const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .vertexBindingDivisorCount = static_cast(vertex_binding_divisors.size()), + .pVertexBindingDivisors = vertex_binding_divisors.data(), + }; + if (!vertex_binding_divisors.empty()) { + vertex_input_ci.pNext = &input_divisor_ci; + } + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .primitiveRestartEnable = state.primitive_restart_enable != 0 && + SupportsPrimitiveRestart(input_assembly_topology), + }; + const VkPipelineTessellationStateCreateInfo tessellation_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + }; + VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; + std::array swizzles; + std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, + .pNext = nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewportSwizzles = swizzles.data(), + }; + if (device.IsNvViewportSwizzleSupported()) { + viewport_ci.pNext = &swizzle_ci; + } + + const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthClampEnable = + static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + .rasterizerDiscardEnable = + static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = static_cast( + dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), + .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), + .depthBiasEnable = state.depth_bias_enable, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, + }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = dynamic.depth_test_enable, + .depthWriteEnable = dynamic.depth_write_enable, + .depthCompareOp = dynamic.depth_test_enable + ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) + : VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .stencilTestEnable = dynamic.stencil_enable, + .front = GetStencilFaceState(dynamic.front), + .back = GetStencilFaceState(dynamic.back), + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + static_vector cb_attachments; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + static constexpr std::array mask_table{ + VK_COLOR_COMPONENT_R_BIT, + VK_COLOR_COMPONENT_G_BIT, + VK_COLOR_COMPONENT_B_BIT, + VK_COLOR_COMPONENT_A_BIT, + }; + const auto format{static_cast(state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + continue; + } + const auto& blend{state.attachments[index]}; + const std::array mask{blend.Mask()}; + VkColorComponentFlags write_mask{}; + for (size_t i = 0; i < mask_table.size(); ++i) { + write_mask |= mask[i] ? mask_table[i] : 0; + } + cb_attachments.push_back({ + .blendEnable = blend.enable != 0, + .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), + .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), + .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), + .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), + .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), + .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), + .colorWriteMask = write_mask, + }); + } + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .logicOpEnable = VK_FALSE, + .logicOp = VK_LOGIC_OP_COPY, + .attachmentCount = static_cast(cb_attachments.size()), + .pAttachments = cb_attachments.data(), + .blendConstants = {}, + }; + static_vector dynamic_states{ + VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + if (device.IsExtExtendedDynamicStateSupported()) { + static constexpr std::array extended{ + VK_DYNAMIC_STATE_CULL_MODE_EXT, + VK_DYNAMIC_STATE_FRONT_FACE_EXT, + VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, + VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, + VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, + VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, + VK_DYNAMIC_STATE_STENCIL_OP_EXT, + }; + dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); + } + const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .dynamicStateCount = static_cast(dynamic_states.size()), + .pDynamicStates = dynamic_states.data(), + }; + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + static_vector shader_stages; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + if (!spv_modules[stage]) { + continue; + } + [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); + /* + if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { + stage_ci.pNext = &subgroup_size_ci; + } + */ + } + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(shader_stages.size()), + .pStages = shader_stages.data(), + .pVertexInputState = &vertex_input_ci, + .pInputAssemblyState = &input_assembly_ci, + .pTessellationState = &tessellation_ci, + .pViewportState = &viewport_ci, + .pRasterizationState = &rasterization_ci, + .pMultisampleState = &multisample_ci, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &color_blend_ci, + .pDynamicState = &dynamic_state_ci, + .layout = *pipeline_layout, + .renderPass = render_pass, + .subpass = 0, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h new file mode 100644 index 000000000..ba1d34a83 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class RenderPassCache; +class VKScheduler; +class VKUpdateDescriptorQueue; + +class GraphicsPipeline { + static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + +public: + explicit GraphicsPipeline() = default; + explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, + TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, const FixedPipelineState& state, + std::array stages, + const std::array& infos); + + void Configure(bool is_indexed); + + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + + GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; + GraphicsPipeline(const GraphicsPipeline&) = delete; + +private: + void MakePipeline(const Device& device, const FixedPipelineState& state, + VkRenderPass render_pass); + + Tegra::Engines::Maxwell3D* maxwell3d{}; + Tegra::MemoryManager* gpu_memory{}; + TextureCache* texture_cache{}; + BufferCache* buffer_cache{}; + VKScheduler* scheduler{}; + VKUpdateDescriptorQueue* update_descriptor_queue{}; + + std::array spv_modules; + std::array stage_infos; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h deleted file mode 100644 index b06288403..000000000 --- a/src/video_core/renderer_vulkan/vk_pipeline.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Pipeline { -public: - /// Add a reference count to the pipeline - void AddRef() noexcept { - ++ref_count; - } - - [[nodiscard]] bool RemoveRef() noexcept { - --ref_count; - return ref_count == 0; - } - - [[nodiscard]] u64 UsageTick() const noexcept { - return usage_tick; - } - -protected: - u64 usage_tick{}; - -private: - size_t ref_count{}; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5477a2903..c9da2080d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,8 +12,11 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" -#include "shader_recompiler/recompiler.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/program_header.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -34,18 +37,18 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -using Tegra::Engines::ShaderType; - namespace { -class Environment final : public Shader::Environment { +using Shader::Backend::SPIRV::EmitSPIRV; + +class GenericEnvironment : public Shader::Environment { public: - explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} {} - ~Environment() override = default; + ~GenericEnvironment() override = default; - [[nodiscard]] std::optional Analyze(u32 start_address) { + std::optional Analyze(u32 start_address) { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -55,52 +58,47 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } - [[nodiscard]] size_t ShaderSize() const noexcept { + [[nodiscard]] size_t CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; + } + + [[nodiscard]] size_t ReadSize() const noexcept { return read_highest - read_lowest + INST_SIZE; } - [[nodiscard]] u128 ComputeHash() const { - const size_t size{ShaderSize()}; + [[nodiscard]] u128 CalculateHash() const { + const size_t size{ReadSize()}; auto data = std::make_unique(size); - gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); return Common::CityHash128(reinterpret_cast(data.get()), size); } - u64 ReadInstruction(u32 address) override { + u64 ReadInstruction(u32 address) final { read_lowest = std::min(read_lowest, address); read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } - return gpu_memory.Read(program_base + address); - } - - u32 TextureBoundBuffer() override { - return kepler_compute.regs.tex_cb_index; - } - - std::array WorkgroupSize() override { - const auto& qmd{kepler_compute.launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + return gpu_memory->Read(program_base + address); } -private: +protected: static constexpr size_t INST_SIZE = sizeof(u64); - static constexpr size_t BLOCK_SIZE = 0x1000; - static constexpr size_t MAXIMUM_SIZE = 0x100000; - static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + std::optional TryFindSize(GPUVAddr guest_addr) { + constexpr size_t BLOCK_SIZE = 0x1000; + constexpr size_t MAXIMUM_SIZE = 0x100000; + + constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - std::optional TryFindSize(u32 start_address) { - GPUVAddr guest_addr = program_base + start_address; size_t offset = 0; size_t size = BLOCK_SIZE; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; - gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { const u64 inst = data[i / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { @@ -114,17 +112,87 @@ private: return std::nullopt; } - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - GPUVAddr program_base; + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; - u32 read_lowest = 0; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; - std::vector code; u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; }; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, + GPUVAddr program_base_, u32 start_offset) + : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + } + } + + ~GraphicsEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return maxwell3d->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + throw Shader::LogicError("Requesting workgroup size in a graphics stage"); + } + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + stage = Shader::Stage::Compute; + } + + ~ComputeEnvironment() override = default; + + u32 TextureBoundBuffer() override { + return kepler_compute->regs.tex_cb_index; + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute->launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -136,19 +204,67 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } +size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, Size()) == 0; +} + PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, - VKUpdateDescriptorQueue& update_descriptor_queue_) + VKUpdateDescriptorQueue& update_descriptor_queue_, + RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, + TextureCache& texture_cache_) : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ - update_descriptor_queue_} {} + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + const auto& float_control{device.FloatControlProperties()}; + profile = Shader::Profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .support_fp16_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, + .support_fp32_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel + }; +} PipelineCache::~PipelineCache() = default; +GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { + MICROPROFILE_SCOPE(Vulkan_PipelineCache); + + if (!RefreshStages()) { + return nullptr; + } + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateGraphicsPipeline(); + return &pipeline; +} + ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); @@ -170,45 +286,130 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return &pipeline; } pipeline = CreateComputePipeline(shader); - shader->compute_users.push_back(key); return &pipeline; } +bool PipelineCache::RefreshStages() { + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + graphics_key.unique_hashes[index] = u128{}; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 offset{shader_config.offset}; + shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + } + graphics_key.unique_hashes[index] = shader_info->unique_hash; + } + return true; +} + +const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr) { + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze(start_address)}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + flow_block_pool.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + std::array envs; + std::array programs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + + const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); + Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); + programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + } + std::array infos{}; + std::array modules; + + u32 binding{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + GraphicsEnvironment& env{envs[index]}; + Shader::IR::Program& program{programs[index]}; + + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + std::vector code{EmitSPIRV(profile, env, program, binding)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + + modules[stage_index] = BuildShader(device, code); + } + return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, + descriptor_pool, update_descriptor_queue, render_pass_cache, + graphics_key.state, std::move(modules), infos); +} + ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - Environment env{kepler_compute, gpu_memory, program_base}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto& float_control{device.FloatControlProperties()}; - const Shader::Profile profile{ - .unified_descriptor_binding = true, - .support_float_controls = true, - .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == - VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_separate_rounding_mode = - float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, - .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, - .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, - .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, - .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, - .support_fp16_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, - .support_fp32_signed_zero_nan_preserve = - float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel - }; - const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; + flow_block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + block_pool.ReleaseContents(); + + Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; + Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + u32 binding{0}; + std::vector code{EmitSPIRV(profile, env, program, binding)}; /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); */ - shader_info->unique_hash = env.ComputeHash(); - shader_info->size_bytes = env.ShaderSize(); - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + shader_info->unique_hash = env.CalculateHash(); + shader_info->size_bytes = env.ReadSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } @@ -216,9 +417,6 @@ ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_ ShaderInfo shader; ComputePipeline pipeline{CreateComputePipeline(&shader)}; const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - shader.compute_users.push_back(key); - pipeline.AddRef(); - const size_t size_bytes{shader.size_bytes}; Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); return &compute_cache.emplace(key, std::move(pipeline)).first->second; @@ -233,18 +431,4 @@ ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) }; } -void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { - for (const ComputePipelineCacheKey& key : shader->compute_users) { - const auto it = compute_cache.find(key); - ASSERT(it != compute_cache.end()); - - Pipeline& pipeline = it->second; - if (pipeline.RemoveRef()) { - // Wait for the pipeline to be free of GPU usage before destroying it - scheduler.Wait(pipeline.UsageTick()); - compute_cache.erase(it); - } - } -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eb35abc27..60fb976df 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -12,11 +12,18 @@ #include #include -#include - #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/shader_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,13 +33,6 @@ class System; namespace Vulkan { -class Device; -class RasterizerVulkan; -class ComputePipeline; -class VKDescriptorPool; -class VKScheduler; -class VKUpdateDescriptorQueue; - using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { @@ -52,6 +52,26 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + } // namespace Vulkan namespace std { @@ -63,14 +83,28 @@ struct hash { } }; +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; + } // namespace std namespace Vulkan { +class ComputePipeline; +class Device; +class RasterizerVulkan; +class RenderPassCache; +class VKDescriptorPool; +class VKScheduler; +class VKUpdateDescriptorQueue; + struct ShaderInfo { u128 unique_hash{}; size_t size_bytes{}; - std::vector compute_users; }; class PipelineCache final : public VideoCommon::ShaderCache { @@ -80,15 +114,23 @@ public: Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + RenderPassCache& render_pass_cache, BufferCache& buffer_cache, + TextureCache& texture_cache); ~PipelineCache() override; - [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); -protected: - void OnShaderRemoval(ShaderInfo* shader) override; + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + bool RefreshStages(); + + const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, + u32 start_address, VAddr cpu_addr); + + GraphicsPipeline CreateGraphicsPipeline(); + ComputePipeline CreateComputePipeline(ShaderInfo* shader); ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); @@ -104,8 +146,20 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; + RenderPassCache& render_pass_cache; + BufferCache& buffer_cache; + TextureCache& texture_cache; + + GraphicsPipelineCacheKey graphics_key{}; std::unordered_map compute_cache; + std::unordered_map graphics_cache; + + Shader::ObjectPool inst_pool; + Shader::ObjectPool block_pool; + Shader::ObjectPool flow_block_pool; + + Shader::Profile profile; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c94419d29..036b531b9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,15 +141,18 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra blit_image(device, scheduler, state_tracker, descriptor_pool), astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, memory_allocator), - texture_cache_runtime{device, scheduler, memory_allocator, - staging_pool, blit_image, astc_decoder_pass}, + render_pass_cache(device), texture_cache_runtime{device, scheduler, + memory_allocator, staging_pool, + blit_image, astc_decoder_pass, + render_pass_cache}, texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, - descriptor_pool, update_descriptor_queue), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, + descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, + texture_cache), + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); @@ -158,7 +161,39 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { - UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); + MICROPROFILE_SCOPE(Vulkan_Drawing); + + SCOPE_EXIT({ gpu.TickWork(); }); + FlushWork(); + + query_cache.UpdateCounters(); + + GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; + if (!pipeline) { + return; + } + update_descriptor_queue.Acquire(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + pipeline->Configure(is_indexed); + + BeginTransformFeedback(); + + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + UpdateDynamicStates(); + + const auto& regs{maxwell3d.regs}; + const u32 num_instances{maxwell3d.mme_draw.instance_count}; + const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; + scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { + if (draw_params.is_indexed) { + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, + draw_params.base_vertex, draw_params.base_instance); + } else { + cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, + draw_params.base_vertex, draw_params.base_instance); + } + }); + EndTransformFeedback(); } void RasterizerVulkan::Clear() { @@ -487,13 +522,11 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } - // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3fd03b915..88dbd753b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -23,6 +23,7 @@ #include "video_core/renderer_vulkan/vk_fence_manager.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" @@ -148,6 +149,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; + RenderPassCache render_pass_cache; TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..7e5ae43ea --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -0,0 +1,100 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include + +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { +namespace { +using VideoCore::Surface::PixelFormat; + +constexpr std::array ATTACHMENT_REFERENCES{ + VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, + VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, +}; + +VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, + VkSampleCountFlagBits samples) { + using MaxwellToVK::SurfaceFormat; + return { + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, + .samples = samples, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + }; +} +} // Anonymous namespace + +RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} + +VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + const auto [pair, is_new] = cache.try_emplace(key); + if (!is_new) { + return *pair->second; + } + boost::container::static_vector descriptions; + u32 num_images{0}; + + for (size_t index = 0; index < key.color_formats.size(); ++index) { + const PixelFormat format{key.color_formats[index]}; + if (format == PixelFormat::Invalid) { + continue; + } + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + ++num_images; + } + const size_t num_colors{descriptions.size()}; + const VkAttachmentReference* depth_attachment{}; + if (key.depth_format != PixelFormat::Invalid) { + depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + } + const VkSubpassDescription subpass{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast(num_colors), + .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = depth_attachment, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; + pair->second = device->GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast(descriptions.size()), + .pAttachments = descriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpass, + .dependencyCount = 0, + .pDependencies = nullptr, + }); + return *pair->second; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..db8e83f1a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -0,0 +1,53 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/surface.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct RenderPassKey { + auto operator<=>(const RenderPassKey&) const noexcept = default; + + std::array color_formats; + VideoCore::Surface::PixelFormat depth_format; + VkSampleCountFlagBits samples; +}; + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { + size_t value = static_cast(key.depth_format) << 48; + value ^= static_cast(key.samples) << 52; + for (size_t i = 0; i < key.color_formats.size(); ++i) { + value ^= static_cast(key.color_formats[i]) << (i * 6); + } + return value; + } +}; +} // namespace std + +namespace Vulkan { + + class Device; + +class RenderPassCache { +public: + explicit RenderPassCache(const Device& device_); + + VkRenderPass Get(const RenderPassKey& key); + +private: + const Device* device{}; + std::unordered_map cache; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..1bbc542a1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -18,6 +18,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; using VideoCore::Surface::IsPixelFormatASTC; namespace { - -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - constexpr VkBorderColor ConvertBorderColor(const std::array& color) { if (color == std::array{0, 0, 0, 0}) { return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; @@ -226,23 +214,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { } } -[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, - const ImageView* image_view) { - using MaxwellToVK::SurfaceFormat; - const PixelFormat pixel_format = image_view->format; - return VkAttachmentDescription{ - .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, - .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, - .samples = image_view->Samples(), - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; -} - [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { switch (swizzle) { case SwizzleSource::Zero: @@ -1164,7 +1135,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { - std::vector descriptions; std::vector attachments; RenderPassKey renderpass_key{}; s32 num_layers = 1; @@ -1175,7 +1145,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.color_formats[index] = color_buffer->format; num_layers = std::max(num_layers, color_buffer->range.extent.layers); @@ -1185,10 +1154,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::spanRenderTarget()); renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); @@ -1201,40 +1167,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), - .subpassCount = 1, - .pSubpasses = &subpass, - .dependencyCount = 0, - .pDependencies = nullptr, - }); - } - renderpass = *cache_pair->second; + renderpass = runtime.render_pass_cache.Get(renderpass_key); + render_area = VkExtent2D{ .width = key.size.width, .height = key.size.height, }; num_color_buffers = static_cast(num_colors); - framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ + framebuffer = runtime.device.GetLogical().CreateFramebuffer({ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..189ee5a68 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -26,35 +26,10 @@ class Device; class Image; class ImageView; class Framebuffer; +class RenderPassCache; class StagingBufferPool; class VKScheduler; -struct RenderPassKey { - constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; - - std::array color_formats; - PixelFormat depth_format; - VkSampleCountFlagBits samples; -}; - -} // namespace Vulkan - -namespace std { -template <> -struct hash { - [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { - size_t value = static_cast(key.depth_format) << 48; - value ^= static_cast(key.samples) << 52; - for (size_t i = 0; i < key.color_formats.size(); ++i) { - value ^= static_cast(key.color_formats[i]) << (i * 6); - } - return value; - } -}; -} // namespace std - -namespace Vulkan { - struct TextureCacheRuntime { const Device& device; VKScheduler& scheduler; @@ -62,7 +37,7 @@ struct TextureCacheRuntime { StagingBufferPool& staging_buffer_pool; BlitImageHelper& blit_image_helper; ASTCDecoderPass& astc_decoder_pass; - std::unordered_map renderpass_cache{}; + RenderPassCache& render_pass_cache; void Finish(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4887d6fd9..f0e5b098c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -49,6 +49,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, + VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME, #ifdef _WIN32 VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, #endif @@ -312,6 +313,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, + .pNext = nullptr, + .shaderDemoteToHelperInvocation = true, + }; + SetNext(next, demote); + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; if (is_float16_supported) { float16_int8 = { @@ -597,8 +605,14 @@ void Device::CheckSuitability(bool requires_swapchain) const { throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); } } + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{}; + demote.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + demote.pNext = nullptr; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + robustness2.pNext = &demote; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -625,6 +639,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3 From f91859efd259995806c2944f7941b105b58300d3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 05:04:12 -0300 Subject: shader: Implement I2F --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.h | 13 ++ .../backend/spirv/emit_spirv_convert.cpp | 48 ++++++ .../backend/spirv/emit_spirv_integer.cpp | 4 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 145 +++++++++++------ src/shader_recompiler/frontend/ir/ir_emitter.h | 14 +- src/shader_recompiler/frontend/ir/opcodes.inc | 13 ++ .../frontend/maxwell/translate/impl/impl.cpp | 21 +++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../impl/integer_floating_point_conversion.cpp | 173 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 -- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 28 ++++ .../ir_opt/lower_fp16_to_fp32.cpp | 16 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- 17 files changed, 429 insertions(+), 70 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 31c394106..d0f0ec775 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -84,6 +84,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_add_three_input.cpp frontend/maxwell/translate/impl/integer_compare.cpp frontend/maxwell/translate/impl/integer_compare_and_set.cpp + frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp frontend/maxwell/translate/impl/integer_funnel_shift.cpp frontend/maxwell/translate/impl/integer_minimum_maximum.cpp frontend/maxwell/translate/impl/integer_popcount.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 6c79b611b..6c8f16562 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -89,6 +89,8 @@ Id EmitContext::Def(const IR::Value& value) { return value.U1() ? true_value : false_value; case IR::Type::U32: return Constant(U32[1], value.U32()); + case IR::Type::U64: + return Constant(U64, value.U64()); case IR::Type::F32: return Constant(F32[1], value.F32()); case IR::Type::F64: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index ae121f534..1fe65f8a9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -243,6 +243,7 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); Id EmitINeg64(EmitContext& ctx, Id value); Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); @@ -302,16 +303,28 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value); Id EmitConvertF32F16(EmitContext& ctx, Id value); Id EmitConvertF32F64(EmitContext& ctx, Id value); Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); Id EmitConvertF16S32(EmitContext& ctx, Id value); Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); Id EmitConvertF16U32(EmitContext& ctx, Id value); Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); Id EmitConvertF32S32(EmitContext& ctx, Id value); Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); Id EmitConvertF32U32(EmitContext& ctx, Id value); Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); Id EmitConvertF64S32(EmitContext& ctx, Id value); Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); Id EmitConvertF64U32(EmitContext& ctx, Id value); Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitBindlessImageSampleImplicitLod(EmitContext&); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 2aff673aa..757165626 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -102,6 +102,14 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) { return ctx.OpFConvert(ctx.F64[1], value); } +Id EmitConvertF16S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + +Id EmitConvertF16S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F16[1], value); +} + Id EmitConvertF16S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F16[1], value); } @@ -110,6 +118,14 @@ Id EmitConvertF16S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F16[1], value); } +Id EmitConvertF16U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + +Id EmitConvertF16U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F16[1], value); +} + Id EmitConvertF16U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F16[1], value); } @@ -118,6 +134,14 @@ Id EmitConvertF16U64(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F16[1], value); } +Id EmitConvertF32S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF32S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF32S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F32[1], value); } @@ -126,6 +150,14 @@ Id EmitConvertF32S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F32[1], value); } +Id EmitConvertF32U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF32U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F32[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF32U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F32[1], value); } @@ -134,6 +166,14 @@ Id EmitConvertF32U64(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F32[1], value); } +Id EmitConvertF64S8(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF64S16(EmitContext& ctx, Id value) { + return ctx.OpConvertSToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF64S32(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F64[1], value); } @@ -142,6 +182,14 @@ Id EmitConvertF64S64(EmitContext& ctx, Id value) { return ctx.OpConvertSToF(ctx.F64[1], value); } +Id EmitConvertF64U8(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U8, value)); +} + +Id EmitConvertF64U16(EmitContext& ctx, Id value) { + return ctx.OpConvertUToF(ctx.F64[1], ctx.OpUConvert(ctx.U16, value)); +} + Id EmitConvertF64U32(EmitContext& ctx, Id value) { return ctx.OpConvertUToF(ctx.F64[1], value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index c9de204b0..a9c5e9cca 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -70,6 +70,10 @@ Id EmitIAbs32(EmitContext& ctx, Id value) { return ctx.OpSAbs(ctx.U32[1], value); } +Id EmitIAbs64(EmitContext& ctx, Id value) { + return ctx.OpSAbs(ctx.U64, value); +} + Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 672836c0b..652f6949e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -53,6 +53,10 @@ U64 IREmitter::Imm64(u64 value) const { return U64{Value{value}}; } +U64 IREmitter::Imm64(s64 value) const { + return U64{Value{static_cast(value)}}; +} + F64 IREmitter::Imm64(f64 value) const { return F64{Value{value}}; } @@ -363,7 +367,7 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { } F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { - if (a.Type() != a.Type()) { + if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { @@ -974,8 +978,15 @@ U32U64 IREmitter::INeg(const U32U64& value) { } } -U32 IREmitter::IAbs(const U32& value) { - return Inst(Opcode::IAbs32, value); +U32U64 IREmitter::IAbs(const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::IAbs32, value); + case Type::U64: + return Inst(Opcode::IAbs64, value); + default: + ThrowInvalidType(value.Type()); + } } U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { @@ -1074,8 +1085,25 @@ U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); } -U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { - return Inst(Opcode::IEqual, lhs, rhs); +U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(Opcode::IEqual, lhs, rhs); + case Type::U64: { + // Manually compare the unpacked values + const Value lhs_vector{UnpackUint2x32(lhs)}; + const Value rhs_vector{UnpackUint2x32(rhs)}; + return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, + IR::U32{CompositeExtract(rhs_vector, 0)}), + IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, + IR::U32{CompositeExtract(rhs_vector, 1)})); + } + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { @@ -1198,79 +1226,96 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v } } -F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16S8, value); + case 16: + return Inst(Opcode::ConvertF16S16, value); + case 32: return Inst(Opcode::ConvertF16S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32S8, value); + case 16: + return Inst(Opcode::ConvertF32S16, value); + case 32: return Inst(Opcode::ConvertF32S32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32S64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64S32, value); - case Type::U64: - return Inst(Opcode::ConvertF64S64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64S8, value); + case 16: + return Inst(Opcode::ConvertF64S16, value); + case 32: + return Inst(Opcode::ConvertF64S32, value); + case 64: + return Inst(Opcode::ConvertF64S64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) { - switch (bitsize) { +F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value) { + switch (dest_bitsize) { case 16: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF16U8, value); + case 16: + return Inst(Opcode::ConvertF16U16, value); + case 32: return Inst(Opcode::ConvertF16U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF16U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 32: - switch (value.Type()) { - case Type::U32: + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF32U8, value); + case 16: + return Inst(Opcode::ConvertF32U16, value); + case 32: return Inst(Opcode::ConvertF32U32, value); - case Type::U64: + case 64: return Inst(Opcode::ConvertF32U64, value); - default: - ThrowInvalidType(value.Type()); } + break; case 64: - switch (value.Type()) { - case Type::U32: - return Inst(Opcode::ConvertF64U32, value); - case Type::U64: - return Inst(Opcode::ConvertF64U64, value); - default: - ThrowInvalidType(value.Type()); + switch (src_bitsize) { + case 8: + return Inst(Opcode::ConvertF64U8, value); + case 16: + return Inst(Opcode::ConvertF64U16, value); + case 32: + return Inst(Opcode::ConvertF64U32, value); + case 64: + return Inst(Opcode::ConvertF64U64, value); } - default: - throw InvalidArgument("Invalid destination bitsize {}", bitsize); + break; } + throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); } -F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) { +F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value) { if (is_signed) { - return ConvertSToF(bitsize, value); + return ConvertSToF(dest_bitsize, src_bitsize, value); } else { - return ConvertUToF(bitsize, value); + return ConvertUToF(dest_bitsize, src_bitsize, value); } } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 72af5db37..8edb11154 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -29,6 +29,7 @@ public: [[nodiscard]] U32 Imm32(s32 value) const; [[nodiscard]] F32 Imm32(f32 value) const; [[nodiscard]] U64 Imm64(u64 value) const; + [[nodiscard]] U64 Imm64(s64 value) const; [[nodiscard]] F64 Imm64(f64 value) const; void Branch(Block* label); @@ -170,7 +171,7 @@ public: [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); [[nodiscard]] U32U64 INeg(const U32U64& value); - [[nodiscard]] U32 IAbs(const U32& value); + [[nodiscard]] U32U64 IAbs(const U32U64& value); [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); @@ -193,7 +194,7 @@ public: [[nodiscard]] U32 UMax(const U32& a, const U32& b); [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); - [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); + [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); @@ -207,9 +208,12 @@ public: [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); - [[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value); - [[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value); + [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, + const Value& value); + [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, + const Value& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 593faca52..8471db7b9 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -247,6 +247,7 @@ OPCODE(IMul32, U32, U32, OPCODE(INeg32, U32, U32, ) OPCODE(INeg64, U64, U64, ) OPCODE(IAbs32, U32, U32, ) +OPCODE(IAbs64, U64, U64, ) OPCODE(ShiftLeftLogical32, U32, U32, U32, ) OPCODE(ShiftLeftLogical64, U64, U64, U32, ) OPCODE(ShiftRightLogical32, U32, U32, U32, ) @@ -311,16 +312,28 @@ OPCODE(ConvertF16F32, F16, F32, OPCODE(ConvertF32F16, F32, F16, ) OPCODE(ConvertF32F64, F32, F64, ) OPCODE(ConvertF64F32, F64, F32, ) +OPCODE(ConvertF16S8, F16, U32, ) +OPCODE(ConvertF16S16, F16, U32, ) OPCODE(ConvertF16S32, F16, U32, ) OPCODE(ConvertF16S64, F16, U64, ) +OPCODE(ConvertF16U8, F16, U32, ) +OPCODE(ConvertF16U16, F16, U32, ) OPCODE(ConvertF16U32, F16, U32, ) OPCODE(ConvertF16U64, F16, U64, ) +OPCODE(ConvertF32S8, F32, U32, ) +OPCODE(ConvertF32S16, F32, U32, ) OPCODE(ConvertF32S32, F32, U32, ) OPCODE(ConvertF32S64, F32, U64, ) +OPCODE(ConvertF32U8, F32, U32, ) +OPCODE(ConvertF32U16, F32, U32, ) OPCODE(ConvertF32U32, F32, U32, ) OPCODE(ConvertF32U64, F32, U64, ) +OPCODE(ConvertF64S8, F64, U32, ) +OPCODE(ConvertF64S16, F64, U32, ) OPCODE(ConvertF64S32, F64, U32, ) OPCODE(ConvertF64S64, F64, U64, ) +OPCODE(ConvertF64U8, F64, U32, ) +OPCODE(ConvertF64U16, F64, U32, ) OPCODE(ConvertF64U32, F64, U32, ) OPCODE(ConvertF64U64, F64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index e444dcd4f..c9af83010 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -121,6 +121,22 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); } +IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { + union { + u64 raw; + BitField<20, 1, u64> unaligned; + } const cbuf{insn}; + + if (cbuf.unaligned != 0) { + throw NotImplementedException("Unaligned packed constant buffer read"); + } + const auto [binding, lower_offset]{CbufAddr(insn)}; + const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; + const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; + const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; + return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); +} + IR::U32 TranslatorVisitor::GetImm20(u64 insn) { union { u64 raw; @@ -158,6 +174,11 @@ IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { return ir.Imm64(Common::BitCast(value | sign_bit)); } +IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { + const s64 value{GetImm20(insn).U32()}; + return ir.Imm64(static_cast(static_cast(value) << 32)); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index ed81d9c36..cb66cca25 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -356,10 +356,12 @@ public: [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); + [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); + [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..e8b5ae1d2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -0,0 +1,173 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FloatFormat : u64 { + F16 = 1, + F32 = 2, + F64 = 3, +}; + +enum class IntFormat : u64 { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 2, FloatFormat> float_format; + BitField<10, 2, IntFormat> int_format; + BitField<13, 1, u64> is_signed; + BitField<39, 2, FpRounding> fp_rounding; + BitField<41, 2, u64> selector; + BitField<47, 1, u64> cc; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; +}; + +bool Is64(u64 insn) { + return Encoding{insn}.int_format == IntFormat::U64; +} + +int BitSize(FloatFormat format) { + switch (format) { + case FloatFormat::F16: + return 16; + case FloatFormat::F32: + return 32; + case FloatFormat::F64: + return 64; + } + throw NotImplementedException("Invalid float format {}", format); +} + +IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { + const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; + const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; + const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; + const IR::U1 is_least{v.ir.IEqual(value, least_value)}; + return IR::U32{v.ir.Select(is_least, value, absolute)}; +} + +void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { + const Encoding i2f{insn}; + if (i2f.cc != 0) { + throw NotImplementedException("CC"); + } + const bool is_signed{i2f.is_signed != 0}; + int src_bitsize{}; + switch (i2f.int_format) { + case IntFormat::U8: + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(8), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 8); + } + src_bitsize = 8; + break; + case IntFormat::U16: + if (i2f.selector == 1 || i2f.selector == 3) { + throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); + } + src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast(i2f.selector) * 8), + v.ir.Imm32(16), is_signed); + if (i2f.abs != 0) { + src = SmallAbs(v, src, 16); + } + src_bitsize = 16; + break; + case IntFormat::U32: + case IntFormat::U64: + if (i2f.selector != 0) { + throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); + } + if (i2f.abs != 0 && is_signed) { + src = v.ir.IAbs(src); + } + src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; + break; + } + const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; + const int dst_bitsize{BitSize(i2f.float_format)}; + IR::F16F32F64 value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src)}; + if (i2f.neg != 0) { + if (i2f.abs != 0 || !is_signed) { + // We know the value is positive + value = v.ir.FPNeg(value); + } else { + // Only negate if the input isn't the lowest value + IR::U1 is_least; + if (src_bitsize == 64) { + is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits::min())); + } else { + const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; + is_least = v.ir.IEqual(src, least_value); + } + value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; + } + } + switch (i2f.float_format) { + case FloatFormat::F16: { + const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; + v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); + break; + } + case FloatFormat::F32: + v.F(i2f.dest_reg, value); + break; + case FloatFormat::F64: { + if (!IR::IsAligned(i2f.dest_reg, 2)) { + throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); + } + const IR::Value vector{v.ir.UnpackDouble2x32(value)}; + for (int i = 0; i < 2; ++i) { + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + } + break; + } + default: + throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); + } +} +} // Anonymous namespace + +void TranslatorVisitor::I2F_reg(u64 insn) { + if (Is64(insn)) { + union { + u64 raw; + BitField<20, 8, IR::Reg> reg; + } const value{insn}; + const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; + I2F(*this, insn, ir.PackUint2x32(regs)); + } else { + I2F(*this, insn, GetReg20(insn)); + } +} + +void TranslatorVisitor::I2F_cbuf(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedCbuf(insn)); + } else { + I2F(*this, insn, GetCbuf(insn)); + } +} + +void TranslatorVisitor::I2F_imm(u64 insn) { + if (Is64(insn)) { + I2F(*this, insn, GetPackedImm20(insn)); + } else { + I2F(*this, insn, GetImm20(insn)); + } +} + +} // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a4367fc5a..4078feafa 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -241,18 +241,6 @@ void TranslatorVisitor::HSETP2_imm(u64) { ThrowNotImplemented(Opcode::HSETP2_imm); } -void TranslatorVisitor::I2F_reg(u64) { - ThrowNotImplemented(Opcode::I2F_reg); -} - -void TranslatorVisitor::I2F_cbuf(u64) { - ThrowNotImplemented(Opcode::I2F_cbuf); -} - -void TranslatorVisitor::I2F_imm(u64) { - ThrowNotImplemented(Opcode::I2F_imm); -} - void TranslatorVisitor::IDE(u64) { ThrowNotImplemented(Opcode::IDE); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 0fbb87ec4..b691b4d1f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -56,7 +56,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { - const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }}; + const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; switch (type) { case TextureType::_1D: return v.F(reg); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index 54f0df754..d5fda20f4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -65,7 +65,7 @@ IR::Value Composite(TranslatorVisitor& v, Args... regs) { } IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { - return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); + return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); } IR::Value Sample(TranslatorVisitor& v, u64 insn) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index fbbe28632..e72505d61 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,6 +79,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU32F16: case IR::Opcode::ConvertU64F16: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16S32: + case IR::Opcode::ConvertF16S64: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF16U32: + case IR::Opcode::ConvertF16U64: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -105,6 +113,14 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: case IR::Opcode::FPTrunc64: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64S32: + case IR::Opcode::ConvertF64S64: + case IR::Opcode::ConvertF64U8: + case IR::Opcode::ConvertF64U16: + case IR::Opcode::ConvertF64U32: + case IR::Opcode::ConvertF64U64: info.uses_fp64 = true; break; default: @@ -123,6 +139,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: case IR::Opcode::SelectU8: + case IR::Opcode::ConvertF16S8: + case IR::Opcode::ConvertF16U8: + case IR::Opcode::ConvertF32S8: + case IR::Opcode::ConvertF32U8: + case IR::Opcode::ConvertF64S8: + case IR::Opcode::ConvertF64U8: info.uses_int8 = true; break; default: @@ -149,6 +171,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertU16F16: case IR::Opcode::ConvertU16F32: case IR::Opcode::ConvertU16F64: + case IR::Opcode::ConvertF16S16: + case IR::Opcode::ConvertF16U16: + case IR::Opcode::ConvertF32S16: + case IR::Opcode::ConvertF32U16: + case IR::Opcode::ConvertF64S16: + case IR::Opcode::ConvertF64U16: info.uses_int16 = true; break; default: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 74acb8bb6..baa3d22df 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -70,6 +70,22 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::Identity; case IR::Opcode::ConvertF16F32: return IR::Opcode::Identity; + case IR::Opcode::ConvertF16S8: + return IR::Opcode::ConvertF32S8; + case IR::Opcode::ConvertF16S16: + return IR::Opcode::ConvertF32S16; + case IR::Opcode::ConvertF16S32: + return IR::Opcode::ConvertF32S32; + case IR::Opcode::ConvertF16S64: + return IR::Opcode::ConvertF32S64; + case IR::Opcode::ConvertF16U8: + return IR::Opcode::ConvertF32U8; + case IR::Opcode::ConvertF16U16: + return IR::Opcode::ConvertF32U16; + case IR::Opcode::ConvertF16U32: + return IR::Opcode::ConvertF32U32; + case IR::Opcode::ConvertF16U64: + return IR::Opcode::ConvertF32U64; default: return op; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c9da2080d..d1399a46d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -227,6 +227,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; + const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, .support_float_controls = true, @@ -242,7 +243,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, - .has_broken_spirv_clamp = true, // TODO: is_intel + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From 76c8a962ac4eae77e71d66a72c448930240339f9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Mar 2021 19:11:56 -0300 Subject: spirv: Implement VertexId and InstanceId, refactor code --- .../backend/spirv/emit_context.cpp | 191 ++++++++++++--------- src/shader_recompiler/backend/spirv/emit_context.h | 14 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 107 +++++++----- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 16 ++ .../backend/spirv/emit_spirv_memory.cpp | 46 +++-- .../ir_opt/collect_shader_info_pass.cpp | 6 + src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 10 files changed, 244 insertions(+), 144 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 6c8f16562..4a4de3676 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -48,6 +48,25 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { } throw InvalidArgument("Invalid texture type {}", desc.type); } + +Id DefineVariable(EmitContext& ctx, Id type, std::optional builtin, + spv::StorageClass storage_class) { + const Id pointer_type{ctx.TypePointer(storage_class, type)}; + const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; + if (builtin) { + ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); + } + ctx.interfaces.push_back(id); + return id; +} + +Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { + return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); +} + +Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { + return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -144,59 +163,8 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const Info& info, Stage stage) { - const auto define{ - [this](Id type, std::optional builtin, spv::StorageClass storage_class) { - const Id pointer_type{TypePointer(storage_class, type)}; - const Id id{AddGlobalVariable(pointer_type, storage_class)}; - if (builtin) { - Decorate(id, spv::Decoration::BuiltIn, *builtin); - } - interfaces.push_back(id); - return id; - }}; - using namespace std::placeholders; - const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; - const auto define_output{std::bind(define, _1, _2, spv::StorageClass::Output)}; - - if (info.uses_workgroup_id) { - workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); - } - if (info.uses_local_invocation_id) { - local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); - } - if (info.loads_position) { - const bool is_fragment{stage != Stage::Fragment}; - const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = define_input(F32[4], built_in); - } - for (size_t i = 0; i < info.loads_generics.size(); ++i) { - if (info.loads_generics[i]) { - // FIXME: Declare size from input - input_generics[i] = define_input(F32[4], std::nullopt); - Decorate(input_generics[i], spv::Decoration::Location, static_cast(i)); - Name(input_generics[i], fmt::format("in_attr{}", i)); - } - } - if (info.stores_position) { - output_position = define_output(F32[4], spv::BuiltIn::Position); - } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (info.stores_generics[i]) { - output_generics[i] = define_output(F32[4], std::nullopt); - Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); - Name(output_generics[i], fmt::format("out_attr{}", i)); - } - } - if (stage == Stage::Fragment) { - for (size_t i = 0; i < 8; ++i) { - if (!info.stores_frag_color[i]) { - continue; - } - frag_color[i] = define_output(F32[4], std::nullopt); - Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); - Name(frag_color[i], fmt::format("frag_color{}", i)); - } - } + DefineInputs(info, stage); + DefineOutputs(info, stage); } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { @@ -225,33 +193,6 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { } } -void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, - u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; - uniform_types.*member_type = uniform_type; - - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("c{}", desc.index)); - for (size_t i = 0; i < desc.count; ++i) { - cbufs[desc.index + i].*member_type = id; - } - binding += desc.count; - } -} - void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { if (info.storage_buffers_descriptors.empty()) { return; @@ -311,4 +252,94 @@ void EmitContext::DefineLabels(IR::Program& program) { } } +void EmitContext::DefineInputs(const Info& info, Stage stage) { + if (info.uses_workgroup_id) { + workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); + } + if (info.uses_local_invocation_id) { + local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); + } + if (info.loads_position) { + const bool is_fragment{stage != Stage::Fragment}; + const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; + input_position = DefineInput(*this, F32[4], built_in); + } + if (info.loads_instance_id) { + if (profile.support_vertex_instance_id) { + instance_id = DefineInput(*this, U32[1], spv::BuiltIn::InstanceId); + } else { + instance_index = DefineInput(*this, U32[1], spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], spv::BuiltIn::BaseInstance); + } + } + if (info.loads_vertex_id) { + if (profile.support_vertex_instance_id) { + vertex_id = DefineInput(*this, U32[1], spv::BuiltIn::VertexId); + } else { + vertex_index = DefineInput(*this, U32[1], spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex); + } + } + for (size_t index = 0; index < info.loads_generics.size(); ++index) { + if (!info.loads_generics[index]) { + continue; + } + // FIXME: Declare size from input + const Id id{DefineInput(*this, F32[4])}; + Decorate(id, spv::Decoration::Location, static_cast(index)); + Name(id, fmt::format("in_attr{}", index)); + input_generics[index] = id; + } +} + +void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; + uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + cbufs[desc.index + i].*member_type = id; + } + binding += desc.count; + } +} + +void EmitContext::DefineOutputs(const Info& info, Stage stage) { + if (info.stores_position) { + output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); + } + for (size_t i = 0; i < info.stores_generics.size(); ++i) { + if (info.stores_generics[i]) { + output_generics[i] = DefineOutput(*this, F32[4]); + Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); + Name(output_generics[i], fmt::format("out_attr{}", i)); + } + } + if (stage == Stage::Fragment) { + for (size_t i = 0; i < 8; ++i) { + if (!info.stores_frag_color[i]) { + continue; + } + frag_color[i] = DefineOutput(*this, F32[4]); + Decorate(frag_color[i], spv::Decoration::Location, static_cast(i)); + Name(frag_color[i], fmt::format("frag_color{}", i)); + } + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 2d7961ac3..9b9e0d6b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -82,6 +82,12 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id instance_id{}; + Id instance_index{}; + Id base_instance{}; + Id vertex_id{}; + Id vertex_index{}; + Id base_vertex{}; Id input_position{}; std::array input_generics{}; @@ -99,11 +105,15 @@ private: void DefineCommonConstants(); void DefineInterfaces(const Info& info, Stage stage); void DefineConstantBuffers(const Info& info, u32& binding); - void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, - Id type, char type_char, u32 element_size); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); void DefineLabels(IR::Program& program); + + void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, + Id type, char type_char, u32 element_size); + + void DefineInputs(const Info& info, Stage stage); + void DefineOutputs(const Info& info, Stage stage); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index b8978b94a..efd0b70b7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -113,6 +113,43 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { } } +Id DefineMain(EmitContext& ctx, IR::Program& program) { + const Id void_function{ctx.TypeFunction(ctx.void_id)}; + const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; + for (IR::Block* const block : program.blocks) { + ctx.AddLabel(block->Definition()); + for (IR::Inst& inst : block->Instructions()) { + EmitInst(ctx, &inst); + } + } + ctx.OpFunctionEnd(); + return main; +} + +void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { + const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); + spv::ExecutionModel execution_model{}; + switch (env.ShaderStage()) { + case Shader::Stage::Compute: { + const std::array workgroup_size{env.WorkgroupSize()}; + execution_model = spv::ExecutionModel::GLCompute; + ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], + workgroup_size[1], workgroup_size[2]); + break; + } + case Shader::Stage::VertexB: + execution_model = spv::ExecutionModel::Vertex; + break; + case Shader::Stage::Fragment: + execution_model = spv::ExecutionModel::Fragment; + ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + break; + default: + throw NotImplementedException("Stage {}", env.ShaderStage()); + } + ctx.AddEntryPoint(execution_model, main, "main", interfaces); +} + void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { if (!profile.support_float_controls) { @@ -173,6 +210,25 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } } +void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { + if (info.uses_sampled_1d) { + ctx.AddCapability(spv::Capability::Sampled1D); + } + if (info.uses_sparse_residency) { + ctx.AddCapability(spv::Capability::SparseResidency); + } + if (info.uses_demote_to_helper_invocation) { + ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); + ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); + } + if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + ctx.AddExtension("SPV_KHR_shader_draw_parameters"); + ctx.AddCapability(spv::Capability::DrawParameters); + } + // TODO: Track this usage + ctx.AddCapability(spv::Capability::ImageGatherExtended); +} + Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { // Phi nodes can have forward declarations, if an argument is not defined provide a forward // declaration of it. Invoke will take care of giving it the right definition when it's @@ -202,53 +258,10 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, u32& binding) { EmitContext ctx{profile, program, binding}; - const Id void_function{ctx.TypeFunction(ctx.void_id)}; - const Id func{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; - for (IR::Block* const block : program.blocks) { - ctx.AddLabel(block->Definition()); - for (IR::Inst& inst : block->Instructions()) { - EmitInst(ctx, &inst); - } - } - ctx.OpFunctionEnd(); - - const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); - spv::ExecutionModel execution_model{}; - switch (env.ShaderStage()) { - case Shader::Stage::Compute: { - const std::array workgroup_size{env.WorkgroupSize()}; - execution_model = spv::ExecutionModel::GLCompute; - ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], - workgroup_size[1], workgroup_size[2]); - break; - } - case Shader::Stage::VertexB: - execution_model = spv::ExecutionModel::Vertex; - break; - case Shader::Stage::Fragment: - execution_model = spv::ExecutionModel::Fragment; - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); - break; - default: - throw NotImplementedException("Stage {}", env.ShaderStage()); - } - ctx.AddEntryPoint(execution_model, func, "main", interfaces); - - SetupDenormControl(profile, program, ctx, func); - const Info& info{program.info}; - if (info.uses_sampled_1d) { - ctx.AddCapability(spv::Capability::Sampled1D); - } - if (info.uses_sparse_residency) { - ctx.AddCapability(spv::Capability::SparseResidency); - } - if (info.uses_demote_to_helper_invocation) { - ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); - ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); - } - // TODO: Track this usage - ctx.AddCapability(spv::Capability::ImageGatherExtended); - + const Id main{DefineMain(ctx, program)}; + DefineEntryPoint(env, ctx, main); + SetupDenormControl(profile, program, ctx, main); + SetupCapabilities(profile, program.info, ctx); return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 1fe65f8a9..e297a0e20 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -81,8 +81,8 @@ void EmitLoadStorageS8(EmitContext& ctx); void EmitLoadStorageU16(EmitContext& ctx); void EmitLoadStorageS16(EmitContext& ctx); Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorage64(EmitContext& ctx); -void EmitLoadStorage128(EmitContext& ctx); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); void EmitWriteStorageU8(EmitContext& ctx); void EmitWriteStorageS8(EmitContext& ctx); void EmitWriteStorageU16(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 02d115740..052b84151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -19,6 +19,10 @@ Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionZ: case IR::Attribute::PositionW: return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); + case IR::Attribute::InstanceId: + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + case IR::Attribute::VertexId: + return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); default: throw NotImplementedException("Read attribute {}", attr); } @@ -125,6 +129,18 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { + if (!ctx.profile.support_vertex_instance_id) { + switch (attr) { + case IR::Attribute::InstanceId: + return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_index), + ctx.OpLoad(ctx.U32[1], ctx.base_instance)); + case IR::Attribute::VertexId: + return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index), + ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); + default: + break; + } + } return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 7d3efc741..088bd3059 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -7,8 +7,8 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { - -static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { +namespace { +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { if (offset.IsImmediate()) { const u32 imm_offset{static_cast(offset.U32() / element_size)}; return ctx.Constant(ctx.U32[1], imm_offset); @@ -22,6 +22,32 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } +Id EmitLoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 num_components) { + // TODO: Support reinterpreting bindings, guaranteed to be aligned + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + std::array components; + for (u32 element = 0; element < num_components; ++element) { + Id index{base_index}; + if (element > 0) { + index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); + } + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + components[element] = ctx.OpLoad(ctx.U32[1], pointer); + } + if (num_components == 1) { + return components[0]; + } else { + const std::span components_span(components.data(), num_components); + return ctx.OpCompositeConstruct(ctx.U32[num_components], components_span); + } +} +} // Anonymous namespace + void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } @@ -95,21 +121,15 @@ void EmitLoadStorageS16(EmitContext&) { } Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - return ctx.OpLoad(ctx.U32[1], pointer); + return EmitLoadStorage(ctx, binding, offset, 1); } -void EmitLoadStorage64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return EmitLoadStorage(ctx, binding, offset, 2); } -void EmitLoadStorage128(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return EmitLoadStorage(ctx, binding, offset, 4); } void EmitWriteStorageU8(EmitContext&) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e72505d61..e7fa3fce0 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -38,6 +38,12 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PositionW: info.loads_position = true; break; + case IR::Attribute::InstanceId: + info.loads_instance_id = true; + break; + case IR::Attribute::VertexId: + info.loads_vertex_id = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c6a143598..770299524 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -8,6 +8,7 @@ namespace Shader { struct Profile { bool unified_descriptor_binding{}; + bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; bool support_separate_rounding_mode{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 6eff762e2..f97730b34 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -59,6 +59,8 @@ struct Info { std::array loads_generics{}; bool loads_position{}; + bool loads_instance_id{}; + bool loads_vertex_id{}; std::array stores_frag_color{}; bool stores_frag_depth{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d1399a46d..90e1a30f6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -230,6 +230,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const VkDriverIdKHR driver_id{device.GetDriverID()}; profile = Shader::Profile{ .unified_descriptor_binding = true, + .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, -- cgit v1.2.3 From e4e1cc11b8f7649171fe922b2899e57120bfba53 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 21 Mar 2021 19:28:37 -0400 Subject: shader: Implement DMNMX, DSET, DSETP --- src/shader_recompiler/CMakeLists.txt | 3 ++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 39 +++++++------- src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_select.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../translate/impl/double_compare_and_set.cpp | 59 ++++++++++++++++++++++ .../maxwell/translate/impl/double_min_max.cpp | 50 ++++++++++++++++++ .../translate/impl/double_set_predicate.cpp | 54 ++++++++++++++++++++ .../translate/impl/floating_point_min_max.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 36 ------------- .../ir_opt/collect_shader_info_pass.cpp | 1 + .../ir_opt/constant_propagation_pass.cpp | 10 +++- src/shader_recompiler/profile.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + 16 files changed, 210 insertions(+), 59 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 63ba1c75f..23cb523a8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,8 +65,11 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/condition_code_set.cpp frontend/maxwell/translate/impl/double_add.cpp + frontend/maxwell/translate/impl/double_compare_and_set.cpp frontend/maxwell/translate/impl/double_fused_multiply_add.cpp + frontend/maxwell/translate/impl/double_min_max.cpp frontend/maxwell/translate/impl/double_multiply.cpp + frontend/maxwell/translate/impl/double_set_predicate.cpp frontend/maxwell/translate/impl/exit_program.cpp frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index efd0b70b7..93e851133 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -152,24 +152,7 @@ void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { - if (!profile.support_float_controls) { - return; - } const Info& info{program.info}; - if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && - !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { - return; - } - ctx.AddExtension("SPV_KHR_float_controls"); - - if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { - ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); - } - if (profile.support_fp32_signed_zero_nan_preserve) { - ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); - } if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { @@ -210,6 +193,22 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } } +void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, + EmitContext& ctx, Id main_func) { + if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); + } + if (profile.support_fp32_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); + } + if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U); + } +} + void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { if (info.uses_sampled_1d) { ctx.AddCapability(spv::Capability::Sampled1D); @@ -260,7 +259,11 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(env, ctx, main); - SetupDenormControl(profile, program, ctx, main); + if (profile.support_float_controls) { + ctx.AddExtension("SPV_KHR_float_controls"); + SetupDenormControl(profile, program, ctx, main); + SetupSignedNanCapabilities(profile, program, ctx, main); + } SetupCapabilities(profile, program.info, ctx); return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 486ef10a7..960d022ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -136,6 +136,7 @@ Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0ae127d50..8b0562da5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -35,4 +35,8 @@ Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); } +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 00c909f3e..432dd29a5 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -529,6 +529,8 @@ Value IREmitter::Select(const U1& condition, const Value& true_value, const Valu return Inst(Opcode::SelectU64, condition, true_value, false_value); case Type::F32: return Inst(Opcode::SelectF32, condition, true_value, false_value); + case Type::F64: + return Inst(Opcode::SelectF64, condition, true_value, false_value); default: throw InvalidArgument("Invalid type {}", true_value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 884eea7a8..bdc07b9a7 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -131,6 +131,7 @@ OPCODE(SelectU32, U32, U1, OPCODE(SelectU64, U64, U1, U64, U64, ) OPCODE(SelectF16, F16, U1, F16, F16, ) OPCODE(SelectF32, F32, U1, F32, F32, ) +OPCODE(SelectF64, F64, U1, F64, F64, ) // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index 1dfaeb92f..c6cd2a79b 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -37,8 +37,8 @@ INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") -INST(DMNMX_reg, "DMNMX (reg)", "0100 1100 0101 0---") -INST(DMNMX_cbuf, "DMNMX (cbuf)", "0101 1100 0101 0---") +INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") +INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..e2ec852c9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp @@ -0,0 +1,59 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + } const dset{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; + + IR::U1 pred{v.ir.GetPred(dset.pred)}; + if (dset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; + + v.X(dset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DSET_reg(u64 insn) { + DSET(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSET_cbuf(u64 insn) { + DSET(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSET_imm(u64 insn) { + DSET(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..55a224db3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp @@ -0,0 +1,50 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const dmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; + + IR::F64 max{v.ir.FPMax(op_a, op_b)}; + IR::F64 min{v.ir.FPMin(op_a, op_b)}; + + if (dmnmx.neg_pred != 0) { + std::swap(min, max); + } + v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::DMNMX_reg(u64 insn) { + DMNMX(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DMNMX_cbuf(u64 insn) { + DMNMX(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DMNMX_imm(u64 insn) { + DMNMX(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp @@ -0,0 +1,54 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + } const dsetp{insn}; + + const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; + const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; + + const BooleanOp bop{dsetp.bop}; + const FPCompareOp compare_op{dsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; + const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(dsetp.dest_pred_a, result_a); + v.ir.SetPred(dsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::DSETP_reg(u64 insn) { + DSETP(*this, insn, GetDoubleReg20(insn)); +} + +void TranslatorVisitor::DSETP_cbuf(u64 insn) { + DSETP(*this, insn, GetDoubleCbuf(insn)); +} + +void TranslatorVisitor::DSETP_imm(u64 insn) { + DSETP(*this, insn, GetDoubleImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index c3180a9bd..343d91032 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -24,7 +24,7 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; - const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ .no_contraction{false}, diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 08f6eb788..27b12ff3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -81,42 +81,6 @@ void TranslatorVisitor::DEPBAR() { // DEPBAR is a no-op } -void TranslatorVisitor::DMNMX_reg(u64) { - ThrowNotImplemented(Opcode::DMNMX_reg); -} - -void TranslatorVisitor::DMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::DMNMX_cbuf); -} - -void TranslatorVisitor::DMNMX_imm(u64) { - ThrowNotImplemented(Opcode::DMNMX_imm); -} - -void TranslatorVisitor::DSET_reg(u64) { - ThrowNotImplemented(Opcode::DSET_reg); -} - -void TranslatorVisitor::DSET_cbuf(u64) { - ThrowNotImplemented(Opcode::DSET_cbuf); -} - -void TranslatorVisitor::DSET_imm(u64) { - ThrowNotImplemented(Opcode::DSET_imm); -} - -void TranslatorVisitor::DSETP_reg(u64) { - ThrowNotImplemented(Opcode::DSETP_reg); -} - -void TranslatorVisitor::DSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::DSETP_cbuf); -} - -void TranslatorVisitor::DSETP_imm(u64) { - ThrowNotImplemented(Opcode::DSETP_imm); -} - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e9f64cf3f..f44eac5d8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -130,6 +130,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::CompositeInsertF64x2: case IR::Opcode::CompositeInsertF64x3: case IR::Opcode::CompositeInsertF64x4: + case IR::Opcode::SelectF64: case IR::Opcode::BitCastU64F64: case IR::Opcode::BitCastF64U64: case IR::Opcode::PackDouble2x32: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index a39db2bf1..ef7766d22 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -229,7 +229,6 @@ void FoldISub32(IR::Inst& inst) { } } -template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -340,8 +339,15 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); + case IR::Opcode::SelectU1: + case IR::Opcode::SelectU8: + case IR::Opcode::SelectU16: case IR::Opcode::SelectU32: - return FoldSelect(inst); + case IR::Opcode::SelectU64: + case IR::Opcode::SelectF16: + case IR::Opcode::SelectF32: + case IR::Opcode::SelectF64: + return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); case IR::Opcode::LogicalOr: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 770299524..3181c79fb 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,7 @@ struct Profile { bool support_fp32_denorm_flush{}; bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; + bool support_fp64_signed_zero_nan_preserve{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 90e1a30f6..75f7c1e61 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -244,6 +244,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, .support_fp32_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, + .support_fp64_signed_zero_nan_preserve = + float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } -- cgit v1.2.3 From c63cf4fa2e22538a01c191e1f97ac0f93b67e804 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: vk_pipeline_cache: Add pipeline cache --- src/shader_recompiler/environment.h | 11 +- src/shader_recompiler/file_environment.cpp | 4 +- src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/stage.h | 4 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 391 ++++++++++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 34 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 1 + .../renderer_vulkan/vk_render_pass_cache.h | 4 +- 8 files changed, 347 insertions(+), 106 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 1fcaa56dd..6dec4b255 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -3,8 +3,8 @@ #include #include "common/common_types.h" -#include "shader_recompiler/stage.h" #include "shader_recompiler/program_header.h" +#include "shader_recompiler/stage.h" namespace Shader { @@ -14,9 +14,9 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; - [[nodiscard]] virtual u32 TextureBoundBuffer() = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; - [[nodiscard]] virtual std::array WorkgroupSize() = 0; + [[nodiscard]] virtual std::array WorkgroupSize() const = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { return sph; @@ -26,9 +26,14 @@ public: return stage; } + [[nodiscard]] u32 StartAddress() const noexcept { + return start_address; + } + protected: ProgramHeader sph{}; Stage stage{}; + u32 start_address{}; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index 21700c72b..f2104f444 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -39,11 +39,11 @@ u64 FileEnvironment::ReadInstruction(u32 offset) { return data[offset / 8]; } -u32 FileEnvironment::TextureBoundBuffer() { +u32 FileEnvironment::TextureBoundBuffer() const { throw NotImplementedException("Texture bound buffer serialization"); } -std::array FileEnvironment::WorkgroupSize() { +std::array FileEnvironment::WorkgroupSize() const { return {1, 1, 1}; } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 62302bc8e..17640a622 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -14,9 +14,9 @@ public: u64 ReadInstruction(u32 offset) override; - u32 TextureBoundBuffer() override; + u32 TextureBoundBuffer() const override; - std::array WorkgroupSize() override; + std::array WorkgroupSize() const override; private: std::vector data; diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index fc6ce6043..7d4f2c0bb 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -4,9 +4,11 @@ #pragma once +#include "common/common_types.h" + namespace Shader { -enum class Stage { +enum class Stage : u32 { Compute, VertexA, VertexB, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 75f7c1e61..41fc9588f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -4,12 +4,15 @@ #include #include +#include #include #include #include "common/bit_cast.h" #include "common/cityhash.h" +#include "common/file_util.h" #include "common/microprofile.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/memory.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -37,18 +40,23 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -namespace { -using Shader::Backend::SPIRV::EmitSPIRV; +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} {} + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; + } ~GenericEnvironment() override = default; - std::optional Analyze(u32 start_address) { + std::optional Analyze() { const std::optional size{TryFindSize(start_address)}; if (!size) { return std::nullopt; @@ -66,11 +74,15 @@ public: return read_highest - read_lowest + INST_SIZE; } + [[nodiscard]] bool CanBeSerialized() const noexcept { + return has_unbound_instructions; + } + [[nodiscard]] u128 CalculateHash() const { const size_t size{ReadSize()}; - auto data = std::make_unique(size); + const auto data{std::make_unique(size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(reinterpret_cast(data.get()), size); + return Common::CityHash128(data.get(), size); } u64 ReadInstruction(u32 address) final { @@ -80,9 +92,32 @@ public: if (address >= cached_lowest && address < cached_highest) { return code[address / INST_SIZE]; } + has_unbound_instructions = true; return gpu_memory->Read(program_base + address); } + void Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(ReadSize())}; + const auto data{std::make_unique(code_size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + + const u32 texture_bound{TextureBoundBuffer()}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(data.get(), code_size); + if (stage == Shader::Stage::Compute) { + const std::array workgroup_size{WorkgroupSize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } + } + protected: static constexpr size_t INST_SIZE = sizeof(u64); @@ -122,16 +157,22 @@ protected: u32 cached_lowest = std::numeric_limits::max(); u32 cached_highest = 0; + + bool has_unbound_instructions = false; }; +namespace { +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_offset) - : GenericEnvironment{gpu_memory_, program_base_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_offset, &sph, sizeof(sph)); + GPUVAddr program_base_, u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -158,11 +199,11 @@ public: ~GraphicsEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -174,18 +215,20 @@ class ComputeEnvironment final : public GenericEnvironment { public: explicit ComputeEnvironment() = default; explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) - : GenericEnvironment{gpu_memory_, program_base_}, kepler_compute{&kepler_compute_} { + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { stage = Shader::Stage::Compute; } ~ComputeEnvironment() override = default; - u32 TextureBoundBuffer() override { + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } - std::array WorkgroupSize() override { + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } @@ -193,8 +236,174 @@ public: private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; + +void SerializePipeline(std::span key, std::span envs, + std::ofstream& file) { + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); +} + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { + try { + std::ofstream file; + file.exceptions(std::ifstream::failbit); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); + return; + } + if (file.tellp() == 0) { + // Write header... + } + const std::span key_span(reinterpret_cast(&key), sizeof(key)); + SerializePipeline(key_span, MakeSpan(envs), file); + + } catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::Delete(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); + } + } +} + +class FileEnvironment final : public Shader::Environment { +public: + void Deserialize(std::ifstream& file) { + u64 code_size{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } + } + + u64 ReadInstruction(u32 address) override { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; + } + + u32 TextureBoundBuffer() const override { + return texture_bound; + } + + std::array WorkgroupSize() const override { + return workgroup_size; + } + +private: + std::unique_ptr code; + std::array workgroup_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; } // Anonymous namespace +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; + std::string base_dir{shader_dir + "/vulkan"}; + std::string transferable_dir{base_dir + "/transferable"}; + std::string precompiled_dir{base_dir + "/precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); + + Common::ThreadWorker worker(11, "PipelineBuilder"); + std::mutex cache_mutex; + struct { + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + std::ifstream file; + Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + // Read header... + + while (file.tellg() != end) { + if (stop_loading) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + auto envs{std::make_shared>(num_envs)}; + for (FileEnvironment& env : *envs) { + env.Deserialize(file); + } + if (envs->front().ShaderStage() == Shader::Stage::Compute) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + + std::lock_guard lock{cache_mutex}; + compute_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } else { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : *envs) { + env_ptrs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + + std::lock_guard lock{cache_mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + } + }); + } + ++state.total; + } + { + std::lock_guard lock{cache_mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + } + worker.WaitForRequests(); +} + size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -279,17 +488,22 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { if (!cpu_shader_addr) { return nullptr; } - ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; if (!shader) { - return CreateComputePipelineWithoutShader(*cpu_shader_addr); + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + shader = MakeShaderInfo(env, *cpu_shader_addr); } - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const ComputePipelineCacheKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { return &pipeline; } - pipeline = CreateComputePipeline(shader); + pipeline = CreateComputePipeline(key, shader); return &pipeline; } @@ -310,26 +524,25 @@ bool PipelineCache::RefreshStages() { } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; if (!shader_info) { - const u32 offset{shader_config.offset}; - shader_info = MakeShaderInfo(program, base_addr, offset, *cpu_shader_addr); + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); } graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; } -const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr) { - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; +const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze(start_address)}) { + if (const std::optional cached_hash{env.Analyze()}) { info->unique_hash = *cached_hash; info->size_bytes = env.CachedSize(); } else { // Slow path, not really hit on commercial games // Build a control flow graph to get the real shader size - flow_block_pool.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + main_pools.flow_block.ReleaseContents(); + Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; info->unique_hash = env.CalculateHash(); info->size_bytes = env.ReadSize(); } @@ -339,100 +552,100 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(Maxwell::ShaderProgram program, return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); - - std::array envs; +GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, + const GraphicsPipelineCacheKey& key, + std::span envs) { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); + size_t env_index{0}; std::array programs; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } - const auto program{static_cast(index)}; - GraphicsEnvironment& env{envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + Shader::Environment& env{*envs[env_index]}; + ++env_index; - const u32 cfg_offset = start_address + sizeof(Shader::ProgramHeader); - Shader::Maxwell::Flow::CFG cfg(env, flow_block_pool, cfg_offset); - programs[index] = Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg); + const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } std::array infos{}; std::array modules; u32 binding{0}; + env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == u128{}) { continue; } UNIMPLEMENTED_IF(index == 0); - GraphicsEnvironment& env{envs[index]}; Shader::IR::Program& program{programs[index]}; - const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - std::vector code{EmitSPIRV(profile, env, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-cross --vulkan-semantics D:\\shader.spv"); + Shader::Environment& env{*envs[env_index]}; + ++env_index; + const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, - graphics_key.state, std::move(modules), infos); + descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, + std::move(modules), infos); } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { +GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { + main_pools.ReleaseContents(); + + std::array graphics_envs; + boost::container::static_vector generic_envs; + boost::container::static_vector envs; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] == u128{}) { + continue; + } + const auto program{static_cast(index)}; + GraphicsEnvironment& env{graphics_envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + generic_envs.push_back(&env); + envs.push_back(&env); + } + GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); + } + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; - ComputeEnvironment env{kepler_compute, gpu_memory, program_base}; - if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { - // TODO: Load from cache + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + main_pools.ReleaseContents(); + ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline_cache_filename.empty()) { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); } - flow_block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - block_pool.ReleaseContents(); + return pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env) const { + LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); - Shader::Maxwell::Flow::CFG cfg{env, flow_block_pool, qmd.program_start}; - Shader::IR::Program program{Shader::Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(profile, env, program, binding)}; - /* - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); - std::system("spirv-dis D:\\shader.spv"); - */ - shader_info->unique_hash = env.CalculateHash(); - shader_info->size_bytes = env.ReadSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { - ShaderInfo shader; - ComputePipeline pipeline{CreateComputePipeline(&shader)}; - const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; - const size_t size_bytes{shader.size_bytes}; - Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); - return &compute_cache.emplace(key, std::move(pipeline)).first->second; -} - -ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { - const auto& qmd{kepler_compute.launch_description}; - return { - .unique_hash = unique_hash, - .shared_memory_size = qmd.shared_alloc, - .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, - }; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 60fb976df..2ecb68bdc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -96,6 +97,7 @@ namespace Vulkan { class ComputePipeline; class Device; +class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKDescriptorPool; @@ -107,6 +109,18 @@ struct ShaderInfo { size_t size_bytes{}; }; +struct ShaderPools { + void ReleaseContents() { + inst.ReleaseContents(); + block.ReleaseContents(); + flow_block.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, @@ -123,19 +137,24 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + private: bool RefreshStages(); - const ShaderInfo* MakeShaderInfo(Maxwell::ShaderProgram program, GPUVAddr base_addr, - u32 start_address, VAddr cpu_addr); + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); GraphicsPipeline CreateGraphicsPipeline(); - ComputePipeline CreateComputePipeline(ShaderInfo* shader); + GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs); - ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, + Shader::Environment& env) const; Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; @@ -155,11 +174,10 @@ private: std::unordered_map compute_cache; std::unordered_map graphics_cache; - Shader::ObjectPool inst_pool; - Shader::ObjectPool block_pool; - Shader::ObjectPool flow_block_pool; + ShaderPools main_pools; Shader::Profile profile; + std::string pipeline_cache_filename; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 7e5ae43ea..1c6ba7289 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -50,6 +50,7 @@ VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { + std::lock_guard lock{mutex}; const auto [pair, is_new] = cache.try_emplace(key); if (!is_new) { return *pair->second; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h index db8e83f1a..eaa0ed775 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.h +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "video_core/surface.h" @@ -37,7 +38,7 @@ struct hash { namespace Vulkan { - class Device; +class Device; class RenderPassCache { public: @@ -48,6 +49,7 @@ public: private: const Device* device{}; std::unordered_map cache; + std::mutex mutex; }; } // namespace Vulkan -- cgit v1.2.3 From f8115a6a9e544c3cc33f32ea821d0df15e01591c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Mar 2021 21:03:20 -0300 Subject: vk_pipeline_cache: Add pipeline cache --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 036b531b9..8f63a7591 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -514,6 +514,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } +void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); +} + void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 88dbd753b..2f1551e65 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -101,6 +101,8 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) override; private: static constexpr size_t MAX_TEXTURES = 192; -- cgit v1.2.3 From d40faa1db0f1703edc4e8f279f1556cee4ebddad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 23 Mar 2021 16:12:04 -0300 Subject: vk_pipeline_cache: Fix ReleaseContents order --- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 2ecb68bdc..d481f56f9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -111,9 +111,9 @@ struct ShaderInfo { struct ShaderPools { void ReleaseContents() { - inst.ReleaseContents(); - block.ReleaseContents(); flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } Shader::ObjectPool inst; -- cgit v1.2.3 From 3d07cef009cf9e287744c7771c67166ef5761ce8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:27:17 -0400 Subject: shader: Implement VOTE --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 ++ src/shader_recompiler/backend/spirv/emit_context.h | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 9 ++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 4 ++ .../backend/spirv/emit_spirv_vote.cpp | 58 ++++++++++++++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 ++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 5 ++ src/shader_recompiler/frontend/ir/opcodes.inc | 6 +++ .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/vote.cpp | 52 +++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 6 +++ src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 7 ++- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 + src/video_core/vulkan_common/vulkan_device.cpp | 1 + src/video_core/vulkan_common/vulkan_device.h | 6 +++ 18 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 23cb523a8..086bdf8d0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp backend/spirv/emit_spirv_undefined.cpp + backend/spirv/emit_spirv_vote.cpp environment.h exception.h file_environment.cpp @@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp + frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4a4de3676..36f130781 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) { if (info.uses_local_invocation_id) { local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); } + if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { + subgroup_local_invocation_id = + DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + } if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 9b9e0d6b1..6e64360bf 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -82,6 +82,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id subgroup_local_invocation_id{}; Id instance_id{}; Id instance_index{}; Id base_instance{}; @@ -96,7 +97,7 @@ public: std::array output_generics{}; std::array frag_color{}; - Id frag_depth {}; + Id frag_depth{}; std::vector interfaces; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 93e851133..107403912 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } + if (info.uses_subgroup_vote && profile.support_vote) { + ctx.AddExtension("SPV_KHR_shader_ballot"); + ctx.AddCapability(spv::Capability::SubgroupBallotKHR); + if (!profile.warp_size_potentially_larger_than_guest) { + // vote ops are only used when not taking the long path + ctx.AddExtension("SPV_KHR_subgroup_vote"); + ctx.AddCapability(spv::Capability::SubgroupVoteKHR); + } + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 960d022ff..ce23200f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va Id coords, Id dref, Id bias_lc, Id offset); Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id dref, Id lod_lc, Id offset); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp new file mode 100644 index 000000000..a63677ef2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp @@ -0,0 +1,58 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id LargeWarpBallot(EmitContext& ctx, Id ballot) { + const Id shift{ctx.Constant(ctx.U32[1], 5)}; + const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; + return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); +} +} // Anonymous namespace + +Id EmitVoteAll(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpIEqual(ctx.U1, lhs, active_mask); +} + +Id EmitVoteAny(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAnyKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; + return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); +} + +Id EmitVoteEqual(EmitContext& ctx, Id pred) { + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); + } + const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; + const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; + const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; + const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), + ctx.OpIEqual(ctx.U1, lhs, active_mask)); +} + +Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { + const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); + } + return LargeWarpBallot(ctx, ballot); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 432dd29a5..ff2970125 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor return Inst(op, Flags{info}, handle, coords, dref, lod_lc, offset); } +U1 IREmitter::VoteAll(const U1& value) { + return Inst(Opcode::VoteAll, value); +} + +U1 IREmitter::VoteAny(const U1& value) { + return Inst(Opcode::VoteAny, value); +} + +U1 IREmitter::VoteEqual(const U1& value) { + return Inst(Opcode::VoteEqual, value); +} + +U32 IREmitter::SubgroupBallot(const U1& value) { + return Inst(Opcode::SubgroupBallot, value); +} + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 346cef3ab..1708be3ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -234,6 +234,11 @@ public: const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] U1 VoteAll(const U1& value); + [[nodiscard]] U1 VoteAny(const U1& value); + [[nodiscard]] U1 VoteEqual(const U1& value); + [[nodiscard]] U32 SubgroupBallot(const U1& value); + private: IR::Block::iterator insertion_point; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index bdc07b9a7..fe888b8b2 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32, OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) + +// Vote operations +OPCODE(VoteAll, U1, U1, ) +OPCODE(VoteAny, U1, U1, ) +OPCODE(VoteEqual, U1, U1, ) +OPCODE(SubgroupBallot, U32, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 27b12ff3c..c0e36a7e2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) { ThrowNotImplemented(Opcode::VMNMX); } -void TranslatorVisitor::VOTE(u64) { - ThrowNotImplemented(Opcode::VOTE); -} - void TranslatorVisitor::VOTE_vtg(u64) { ThrowNotImplemented(Opcode::VOTE_vtg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..a88894a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp @@ -0,0 +1,52 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class VoteOp : u64 { + ALL, + ANY, + EQ, +}; + +[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { + switch (vote_op) { + case VoteOp::ALL: + return ir.VoteAll(pred); + case VoteOp::ANY: + return ir.VoteAny(pred); + case VoteOp::EQ: + return ir.VoteEqual(pred); + default: + throw NotImplementedException("Invalid VOTE op {}", vote_op); + } +} + +void Vote(TranslatorVisitor& v, u64 insn) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 3, IR::Pred> pred_a; + BitField<42, 1, u64> neg_pred_a; + BitField<45, 3, IR::Pred> pred_b; + BitField<48, 2, VoteOp> vote_op; + } const vote{insn}; + + const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; + v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); + v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); +} +} // Anonymous namespace + +void TranslatorVisitor::VOTE(u64 insn) { + Vote(*this, insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f44eac5d8..db5138e4d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::VoteAll: + case IR::Opcode::VoteAny: + case IR::Opcode::VoteEqual: + case IR::Opcode::SubgroupBallot: + info.uses_subgroup_vote = true; + break; default: break; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3181c79fb..b57cbc310 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -19,6 +19,8 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_vote{}; + bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f97730b34..3d9f04d1a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -80,6 +80,7 @@ struct Info { bool uses_sampled_1d{}; bool uses_sparse_residency{}; bool uses_demote_to_helper_invocation{}; + bool uses_subgroup_vote{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6684d37a6..8e544d745 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 41fc9588f..bdbc8dd1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_vote = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, }; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0e5b098c..009b74f12 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -737,6 +737,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { subgroup_properties.maxSubgroupSize >= GuestWarpSize) { extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; + ext_subgroup_size_control = true; } } else { is_warp_potentially_bigger = true; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 82bccc8f0..c268a4f8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -193,6 +193,11 @@ public: return ext_shader_viewport_index_layer; } + /// Returns true if the device supports VK_EXT_subgroup_size_control. + bool IsExtSubgroupSizeControlSupported() const { + return ext_subgroup_size_control; + } + /// Returns true if the device supports VK_EXT_transform_feedback. bool IsExtTransformFeedbackSupported() const { return ext_transform_feedback; @@ -297,6 +302,7 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. -- cgit v1.2.3 From 68a9505d8a1d00c6ba2739bc0af3069cf87b9b84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 24 Mar 2021 01:33:45 -0300 Subject: shader: Implement NDC [-1, 1], attribute types and default varying initialization --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 35 +++++++---- src/shader_recompiler/backend/spirv/emit_context.h | 10 +++- src/shader_recompiler/backend/spirv/emit_spirv.h | 2 + .../backend/spirv/emit_spirv_context_get_set.cpp | 69 ++++++++++++++-------- .../backend/spirv/emit_spirv_special.cpp | 35 +++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 8 +++ src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + .../frontend/ir/microinstruction.cpp | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 4 ++ .../frontend/maxwell/structured_control_flow.cpp | 7 ++- src/shader_recompiler/profile.h | 13 ++++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 33 ++++++++++- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 +- 15 files changed, 186 insertions(+), 43 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_special.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 086bdf8d0..028e8b2d2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_special.cpp backend/spirv/emit_spirv_undefined.cpp backend/spirv/emit_spirv_vote.cpp environment.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 36f130781..ea46af244 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -67,6 +67,18 @@ Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } + +Id GetAttributeType(EmitContext& ctx, AttributeType type) { + switch (type) { + case AttributeType::Float: + return ctx.F32[4]; + case AttributeType::SignedInt: + return ctx.TypeVector(ctx.TypeInt(32, true), 4); + case AttributeType::UnsignedInt: + return ctx.U32[4]; + } + throw InvalidArgument("Invalid attribute type {}", type); +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -82,11 +94,11 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) - : Sirit::Module(0x00010000), profile{profile_} { + : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineInterfaces(program.info, program.stage); + DefineInterfaces(program.info); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); @@ -130,6 +142,9 @@ void EmitContext::DefineCommonTypes(const Info& info) { U32.Define(*this, TypeInt(32, false), "u32"); input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); if (info.uses_int8) { @@ -162,9 +177,9 @@ void EmitContext::DefineCommonConstants() { u32_zero_value = Constant(U32[1], 0U); } -void EmitContext::DefineInterfaces(const Info& info, Stage stage) { - DefineInputs(info, stage); - DefineOutputs(info, stage); +void EmitContext::DefineInterfaces(const Info& info) { + DefineInputs(info); + DefineOutputs(info); } void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { @@ -252,7 +267,7 @@ void EmitContext::DefineLabels(IR::Program& program) { } } -void EmitContext::DefineInputs(const Info& info, Stage stage) { +void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); } @@ -288,8 +303,8 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) { if (!info.loads_generics[index]) { continue; } - // FIXME: Declare size from input - const Id id{DefineInput(*this, F32[4])}; + const Id type{GetAttributeType(*this, profile.generic_input_types[index])}; + const Id id{DefineInput(*this, type)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; @@ -323,8 +338,8 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: } } -void EmitContext::DefineOutputs(const Info& info, Stage stage) { - if (info.stores_position) { +void EmitContext::DefineOutputs(const Info& info) { + if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); } for (size_t i = 0; i < info.stores_generics.size(); ++i) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 6e64360bf..5ed815c06 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -52,6 +52,7 @@ public: [[nodiscard]] Id Def(const IR::Value& value); const Profile& profile; + Stage stage{}; Id void_id{}; Id U1{}; @@ -72,6 +73,9 @@ public: UniformDefinitions uniform_types; Id input_f32{}; + Id input_u32{}; + Id input_s32{}; + Id output_f32{}; Id storage_u32{}; @@ -104,7 +108,7 @@ public: private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineInterfaces(const Info& info, Stage stage); + void DefineInterfaces(const Info& info); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); @@ -113,8 +117,8 @@ private: void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size); - void DefineInputs(const Info& info, Stage stage); - void DefineOutputs(const Info& info, Stage stage); + void DefineInputs(const Info& info); + void DefineOutputs(const Info& info); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index ce23200f2..7fefcf2f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -28,6 +28,8 @@ void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 052b84151..8fc040f8b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -2,30 +2,26 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/backend/spirv/emit_spirv.h" +#pragma optimize("", off) + namespace Shader::Backend::SPIRV { namespace { -Id InputAttrPointer(EmitContext& ctx, IR::Attribute attr) { - const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; - if (IR::IsGeneric(attr)) { - const u32 index{IR::GenericAttributeIndex(attr)}; - return ctx.OpAccessChain(ctx.input_f32, ctx.input_generics.at(index), element_id()); - } - switch (attr) { - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - return ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id()); - case IR::Attribute::InstanceId: - return ctx.OpLoad(ctx.U32[1], ctx.instance_id); - case IR::Attribute::VertexId: - return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); - default: - throw NotImplementedException("Read attribute {}", attr); +std::tuple AttrTypes(EmitContext& ctx, u32 index) { + const bool is_first_reader{ctx.stage == Stage::VertexB}; + const AttributeType type{ctx.profile.generic_input_types.at(index)}; + switch (type) { + case AttributeType::Float: + return {ctx.input_f32, ctx.F32[1], false}; + case AttributeType::UnsignedInt: + return {ctx.input_u32, ctx.U32[1], true}; + case AttributeType::SignedInt: + return {ctx.input_s32, ctx.TypeInt(32, true), true}; } + throw InvalidArgument("Invalid attribute type {}", type); } Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { @@ -129,19 +125,40 @@ Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& o } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { - if (!ctx.profile.support_vertex_instance_id) { - switch (attr) { - case IR::Attribute::InstanceId: + const u32 element{static_cast(attr) % 4}; + const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + const auto [pointer_type, type, needs_cast]{AttrTypes(ctx, index)}; + const Id generic_id{ctx.input_generics.at(index)}; + const Id pointer{ctx.OpAccessChain(pointer_type, generic_id, element_id())}; + const Id value{ctx.OpLoad(type, pointer)}; + return needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id())); + case IR::Attribute::InstanceId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + } else { return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_index), ctx.OpLoad(ctx.U32[1], ctx.base_instance)); - case IR::Attribute::VertexId: + } + case IR::Attribute::VertexId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); + } else { return ctx.OpISub(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_index), ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); - default: - break; } + default: + throw NotImplementedException("Read attribute {}", attr); } - return ctx.OpLoad(ctx.F32[1], InputAttrPointer(ctx, attr)); } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 000000000..70ae7b51e --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -0,0 +1,35 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitPrologue(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB) { + const Id zero{ctx.Constant(ctx.F32[1], 0.0f)}; + const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; + const Id null_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, zero)}; + ctx.OpStore(ctx.output_position, ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)); + for (const Id generic_id : ctx.output_generics) { + if (Sirit::ValidId(generic_id)) { + ctx.OpStore(generic_id, null_vector); + } + } + } +} + +void EmitEpilogue(EmitContext& ctx) { + if (ctx.profile.convert_depth_mode) { + const Id type{ctx.F32[1]}; + const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; + const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; + ctx.OpStore(ctx.output_position, vector); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ff2970125..ce610799a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -92,6 +92,14 @@ void IREmitter::DemoteToHelperInvocation(Block* continue_label) { Inst(Opcode::DemoteToHelperInvocation, continue_label); } +void IREmitter::Prologue() { + Inst(Opcode::Prologue); +} + +void IREmitter::Epilogue() { + Inst(Opcode::Epilogue); +} + U32 IREmitter::GetReg(IR::Reg reg) { return Inst(Opcode::GetRegister, reg); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 1708be3ef..39109b0de 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -39,6 +39,9 @@ public: void Return(); void DemoteToHelperInvocation(Block* continue_label); + void Prologue(); + void Epilogue(); + [[nodiscard]] U32 GetReg(IR::Reg reg); void SetReg(IR::Reg reg, const U32& value); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 21b7d8a9f..ba3968056 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -56,6 +56,8 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SelectionMerge: case Opcode::Return: case Opcode::DemoteToHelperInvocation: + case Opcode::Prologue: + case Opcode::Epilogue: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: case Opcode::SetFragColor: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index fe888b8b2..8945c7b04 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -15,6 +15,10 @@ OPCODE(SelectionMerge, Void, Labe OPCODE(Return, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) +// Special operations +OPCODE(Prologue, Void, ) +OPCODE(Epilogue, Void, ) + // Context getters/setters OPCODE(GetRegister, U32, Reg, ) OPCODE(SetRegister, Void, Reg, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index cec03e73e..fdac1c95a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -634,6 +634,9 @@ public: : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, block_list{block_list_} { Visit(root_stmt, nullptr, nullptr); + + IR::IREmitter ir{*block_list.front()}; + ir.Prologue(); } private: @@ -734,7 +737,9 @@ private: current_block = block_pool.Create(inst_pool); block_list.push_back(current_block); } - IR::IREmitter{*current_block}.Return(); + IR::IREmitter ir{*current_block}; + ir.Epilogue(); + ir.Return(); current_block = nullptr; break; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index b57cbc310..41550bfc6 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -4,8 +4,18 @@ #pragma once +#include + +#include "common/common_types.h" + namespace Shader { +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, +}; + struct Profile { bool unified_descriptor_binding{}; bool support_vertex_instance_id{}; @@ -24,6 +34,9 @@ struct Profile { // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; + + std::array generic_input_types{}; + bool convert_depth_mode{}; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a2ec418b1..a87ed1976 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -181,6 +181,9 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index bdbc8dd1e..504b8c9d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -437,7 +437,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - profile = Shader::Profile{ + base_profile = Shader::Profile{ .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -458,6 +458,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .generic_input_types{}, }; } @@ -589,6 +590,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, Shader::Environment& env{*envs[env_index]}; ++env_index; + const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; const std::vector code{EmitSPIRV(profile, env, program, binding)}; modules[stage_index] = BuildShader(device, code); } @@ -645,9 +647,36 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, env, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, + Shader::Stage stage) { + Shader::Profile profile{base_profile}; + if (stage == Shader::Stage::VertexB) { + profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), + &CastAttributeType); + } + return profile; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index d481f56f9..e09d78063 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -156,6 +156,8 @@ private: ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env) const; + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -176,7 +178,7 @@ private: ShaderPools main_pools; - Shader::Profile profile; + Shader::Profile base_profile; std::string pipeline_cache_filename; }; -- cgit v1.2.3 From 17063d16a3cfe6542e74265739191e1d018fc456 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:45:38 -0300 Subject: shader: Implement TXQ and fix FragDepth --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 8 +- src/shader_recompiler/backend/spirv/emit_context.h | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 10 ++- src/shader_recompiler/backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_image.cpp | 48 ++++++++++- src/shader_recompiler/environment.h | 3 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 6 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../maxwell/translate/impl/not_implemented.cpp | 8 -- .../maxwell/translate/impl/texture_query.cpp | 76 ++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 20 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 92 ++++++++++++++++++++++ 15 files changed, 264 insertions(+), 21 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 5f3868bfe..7f8dc8eed 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -126,6 +126,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/vote.cpp frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 50793b5bf..c2d13f97c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -244,8 +244,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { if (desc.count != 1) { throw NotImplementedException("Array of textures"); } - const Id type{TypeSampledImage(ImageType(*this, desc))}; - const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)}; + const Id image_type{ImageType(*this, desc)}; + const Id sampled_type{TypeSampledImage(image_type)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); @@ -254,7 +255,8 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { // TODO: Pass count info textures.push_back(TextureDefinition{ .id{id}, - .type{type}, + .sampled_type{sampled_type}, + .image_type{image_type}, }); } binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 5ed815c06..0cb411a0e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -31,7 +31,8 @@ private: struct TextureDefinition { Id id; - Id type; + Id sampled_type; + Id image_type; }; struct UniformDefinitions { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index cee72f50d..4bed16e7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -126,10 +126,10 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } -void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { +void DefineEntryPoint(Environment& env, const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (env.ShaderStage()) { + switch (program.stage) { case Shader::Stage::Compute: { const std::array workgroup_size{env.WorkgroupSize()}; execution_model = spv::ExecutionModel::GLCompute; @@ -143,6 +143,9 @@ void DefineEntryPoint(Environment& env, EmitContext& ctx, Id main) { case Shader::Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); + if (program.info.stores_frag_depth) { + ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); + } break; default: throw NotImplementedException("Stage {}", env.ShaderStage()); @@ -235,6 +238,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); + ctx.AddCapability(spv::Capability::ImageQuery); } Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { @@ -267,7 +271,7 @@ std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program u32& binding) { EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(env, ctx, main); + DefineEntryPoint(env, program, ctx, main); if (profile.support_float_controls) { ctx.AddExtension("SPV_KHR_float_controls"); SetupDenormControl(profile, program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4da1f3707..b82b16e9d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -343,6 +343,7 @@ Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); Id EmitBindlessImageGather(EmitContext&); Id EmitBindlessImageGatherDref(EmitContext&); Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); @@ -350,6 +351,7 @@ Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); Id EmitBoundImageGather(EmitContext&); Id EmitBoundImageGatherDref(EmitContext&); Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -364,6 +366,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, const IR::Value& offset, const IR::Value& offset2, Id dref); Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index b6e9d3c0c..3ea0011aa 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -91,7 +91,15 @@ private: Id Texture(EmitContext& ctx, const IR::Value& index) { if (index.IsImmediate()) { const TextureDefinition def{ctx.textures.at(index.U32())}; - return ctx.OpLoad(def.type, def.id); + return ctx.OpLoad(def.sampled_type, def.id); + } + throw NotImplementedException("Indirect texture sample"); +} + +Id TextureImage(EmitContext& ctx, const IR::Value& index) { + if (index.IsImmediate()) { + const TextureDefinition def{ctx.textures.at(index.U32())}; + return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } throw NotImplementedException("Indirect texture sample"); } @@ -149,6 +157,10 @@ Id EmitBindlessImageFetch(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -177,6 +189,10 @@ Id EmitBoundImageFetch(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageQueryDimensions(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -241,4 +257,34 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c Texture(ctx, index), coords, operands.Mask(), operands.Span()); } +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { + const auto info{inst->Flags()}; + const Id image{TextureImage(ctx, index)}; + const Id zero{ctx.u32_zero_value}; + const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; + switch (info.type) { + case TextureType::Color1D: + case TextureType::Shadow1D: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), + zero, zero, mips()); + case TextureType::ColorArray1D: + case TextureType::Color2D: + case TextureType::ColorCube: + case TextureType::ShadowArray1D: + case TextureType::Shadow2D: + case TextureType::ShadowCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), + zero, mips()); + case TextureType::ColorArray2D: + case TextureType::Color3D: + case TextureType::ColorArrayCube: + case TextureType::ShadowArray2D: + case TextureType::Shadow3D: + case TextureType::ShadowArrayCube: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), + mips()); + } + throw LogicError("Unspecified image type {}", info.type.Value()); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 6dec4b255..0c62c1c54 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -4,6 +4,7 @@ #include "common/common_types.h" #include "shader_recompiler/program_header.h" +#include "shader_recompiler/shader_info.h" #include "shader_recompiler/stage.h" namespace Shader { @@ -14,6 +15,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; [[nodiscard]] virtual std::array WorkgroupSize() const = 0; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0296f8773..f281c023f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1493,6 +1493,12 @@ Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Valu return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); } +Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions + : Opcode::BindlessImageQueryDimensions}; + return Inst(op, handle, lod); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 446fd7785..771c186d4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -239,6 +239,7 @@ public: const F32& dref, const F32& lod, const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index e12b92c47..5d7462d76 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -356,6 +356,7 @@ OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -364,6 +365,7 @@ OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -372,6 +374,7 @@ OPCODE(ImageSampleDrefExplicitLod, F32, U32, OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 788765c21..96ee2e741 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -373,14 +373,6 @@ void TranslatorVisitor::TXD_b(u64) { ThrowNotImplemented(Opcode::TXD_b); } -void TranslatorVisitor::TXQ(u64) { - ThrowNotImplemented(Opcode::TXQ); -} - -void TranslatorVisitor::TXQ_b(u64) { - ThrowNotImplemented(Opcode::TXQ_b); -} - void TranslatorVisitor::VABSDIFF(u64) { ThrowNotImplemented(Opcode::VABSDIFF); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..e8ea8faeb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -0,0 +1,76 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Dimension = 1, + TextureType = 2, + SamplePos = 5, +}; + +IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { + switch (mode) { + case Mode::Dimension: { + const IR::U32 lod{v.X(src_reg)}; + return v.ir.ImageQueryDimension(handle, lod); + } + case Mode::TextureType: + case Mode::SamplePos: + default: + throw NotImplementedException("Mode {}", mode); + } +} + +void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<22, 3, Mode> mode; + BitField<31, 4, u64> mask; + } const txq{insn}; + + IR::Reg src_reg{txq.src_reg}; + IR::U32 handle; + if (cbuf_offset) { + handle = v.ir.Imm32(*cbuf_offset); + } else { + handle = v.X(src_reg); + ++src_reg; + } + const IR::Value query{Query(v, handle, txq.mode, src_reg)}; + IR::Reg dest_reg{txq.dest_reg}; + for (int element = 0; element < 4; ++element) { + if (((txq.mask >> element) & 1) == 0) { + continue; + } + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + ++dest_reg; + } +} +} // Anonymous namespace + +void TranslatorVisitor::TXQ(u64 insn) { + union { + u64 raw; + BitField<36, 13, u64> cbuf_offset; + } const txq{insn}; + + Impl(*this, insn, static_cast(txq.cbuf_offset)); +} + +void TranslatorVisitor::TXQ_b(u64 insn) { + Impl(*this, insn, std::nullopt); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6fe06fda8..80ca8db26 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -365,7 +365,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: { + case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageQueryDimensions: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 0167dd06e..dfacf848f 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -54,6 +54,9 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BindlessImageFetch: case IR::Opcode::BoundImageFetch: return IR::Opcode::ImageFetch; + case IR::Opcode::BoundImageQueryDimensions: + case IR::Opcode::BindlessImageQueryDimensions: + return IR::Opcode::ImageQueryDimensions; default: return IR::Opcode::Void; } @@ -68,6 +71,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageGather: case IR::Opcode::BindlessImageGatherDref: case IR::Opcode::BindlessImageFetch: + case IR::Opcode::BindlessImageQueryDimensions: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -76,6 +80,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGather: case IR::Opcode::BoundImageGatherDref: case IR::Opcode::BoundImageFetch: + case IR::Opcode::BoundImageQueryDimensions: return false; default: throw InvalidArgument("Invalid opcode {}", inst.Opcode()); @@ -198,13 +203,20 @@ void TexturePass(Environment& env, IR::Program& program) { for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; + inst->ReplaceOpcode(IndexedInstruction(*inst)); + + const auto& cbuf{texture_inst.cbuf}; + auto flags{inst->Flags()}; + if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) { + flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + inst->SetFlags(flags); + } const u32 index{descriptors.Add(TextureDescriptor{ - .type{inst->Flags().type}, - .cbuf_index{texture_inst.cbuf.index}, - .cbuf_offset{texture_inst.cbuf.offset}, + .type{flags.type}, + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, .count{1}, })}; - inst->ReplaceOpcode(IndexedInstruction(*inst)); inst->SetArg(0, IR::Value{index}); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 504b8c9d6..30d424346 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -25,6 +25,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/pipeline_helper.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -45,6 +46,10 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } +u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | static_cast(offset); +} + class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; @@ -101,15 +106,21 @@ public: const auto data{std::make_unique(code_size)}; gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); + const u64 num_texture_types{static_cast(texture_types.size())}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); @@ -147,10 +158,47 @@ protected: return std::nullopt; } + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, + u32 cbuf_offset) { + const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + + const Shader::TextureType result{[&] { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + throw Shader::NotImplementedException("Texture buffer"); + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } + }()}; + texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + return result; + } + Tegra::MemoryManager* gpu_memory{}; GPUVAddr program_base{}; std::vector code; + std::unordered_map texture_types; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -176,29 +224,45 @@ public: switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; + stage_index = 0; break; case Maxwell::ShaderProgram::VertexB: stage = Shader::Stage::VertexB; + stage_index = 0; break; case Maxwell::ShaderProgram::TesselationControl: stage = Shader::Stage::TessellationControl; + stage_index = 1; break; case Maxwell::ShaderProgram::TesselationEval: stage = Shader::Stage::TessellationEval; + stage_index = 2; break; case Maxwell::ShaderProgram::Geometry: stage = Shader::Stage::Geometry; + stage_index = 3; break; case Maxwell::ShaderProgram::Fragment: stage = Shader::Stage::Fragment; + stage_index = 4; break; default: UNREACHABLE_MSG("Invalid program={}", program); + break; } } ~GraphicsEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{maxwell3d->regs}; + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, + cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return maxwell3d->regs.tex_cb_index; } @@ -209,6 +273,7 @@ public: private: Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; }; class ComputeEnvironment final : public GenericEnvironment { @@ -224,6 +289,15 @@ public: ~ComputeEnvironment() override = default; + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, + cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + } + u32 TextureBoundBuffer() const override { return kepler_compute->regs.tex_cb_index; } @@ -278,7 +352,9 @@ class FileEnvironment final : public Shader::Environment { public: void Deserialize(std::ifstream& file) { u64 code_size{}; + u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -286,6 +362,13 @@ public: .read(reinterpret_cast(&stage), sizeof(stage)); code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u64 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { @@ -300,6 +383,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -310,6 +401,7 @@ public: private: std::unique_ptr code; + std::unordered_map texture_types; std::array workgroup_size{}; u32 texture_bound{}; u32 read_lowest{}; -- cgit v1.2.3 From ec005be99d4f231f6d4d812841c84ab4af4204a6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 26 Mar 2021 18:55:07 -0300 Subject: shader: Fix rasterizer integration order issues --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 7 +++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | 5 +---- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a87ed1976..82536b9d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -139,7 +139,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector samplers; texture_cache->SynchronizeGraphicsDescriptors(); - texture_cache->UpdateRenderTargets(false); const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; @@ -181,13 +180,17 @@ void GraphicsPipeline::Configure(bool is_indexed) { PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), *texture_cache, *update_descriptor_queue, index); } + texture_cache->UpdateRenderTargets(false); + scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); + + scheduler->BindGraphicsPipeline(*pipeline); + if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - scheduler->BindGraphicsPipeline(*pipeline); scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8f63a7591..d7d9927dd 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -178,7 +178,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); UpdateDynamicStates(); const auto& regs{maxwell3d.regs}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 1c6ba7289..b2dcd74ab 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -56,15 +56,12 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; - u32 num_images{0}; - for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; if (format == PixelFormat::Invalid) { continue; } descriptions.push_back(AttachmentDescription(*device, format, key.samples)); - ++num_images; } const size_t num_colors{descriptions.size()}; const VkAttachmentReference* depth_attachment{}; @@ -89,7 +86,7 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pNext = nullptr, .flags = 0, .attachmentCount = static_cast(descriptions.size()), - .pAttachments = descriptions.data(), + .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .subpassCount = 1, .pSubpasses = &subpass, .dependencyCount = 0, -- cgit v1.2.3 From cb6039ccea77d35fb829c337fd61451f549e3453 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 02:56:09 -0300 Subject: vk_pipeline_cache: Fix pipeline and shader caches --- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 +++++++++++++++++----- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30d424346..51c155077 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -62,7 +62,7 @@ public: ~GenericEnvironment() override = default; std::optional Analyze() { - const std::optional size{TryFindSize(start_address)}; + const std::optional size{TryFindSize()}; if (!size) { return std::nullopt; } @@ -71,6 +71,13 @@ public: return Common::CityHash128(reinterpret_cast(code.data()), code.size()); } + void SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); + } + [[nodiscard]] size_t CachedSize() const noexcept { return cached_highest - cached_lowest + INST_SIZE; } @@ -80,7 +87,7 @@ public: } [[nodiscard]] bool CanBeSerialized() const noexcept { - return has_unbound_instructions; + return !has_unbound_instructions; } [[nodiscard]] u128 CalculateHash() const { @@ -95,7 +102,7 @@ public: read_highest = std::max(read_highest, address); if (address >= cached_lowest && address < cached_highest) { - return code[address / INST_SIZE]; + return code[(address - cached_lowest) / INST_SIZE]; } has_unbound_instructions = true; return gpu_memory->Read(program_base + address); @@ -117,30 +124,34 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); + file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } + file.flush(); } protected: static constexpr size_t INST_SIZE = sizeof(u64); - std::optional TryFindSize(GPUVAddr guest_addr) { + std::optional TryFindSize() { constexpr size_t BLOCK_SIZE = 0x1000; constexpr size_t MAXIMUM_SIZE = 0x100000; constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - size_t offset = 0; - size_t size = BLOCK_SIZE; + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; while (size <= MAXIMUM_SIZE) { code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; @@ -623,6 +634,7 @@ bool PipelineCache::RefreshStages() { GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; shader_info = MakeShaderInfo(env, *cpu_shader_addr); } + shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } return true; @@ -707,6 +719,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { GraphicsEnvironment& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + generic_envs.push_back(&env); envs.push_back(&env); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e09d78063..b55e14189 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -172,6 +172,7 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + std::array shader_infos{}; std::unordered_map compute_cache; std::unordered_map graphics_cache; -- cgit v1.2.3 From 675a82416d7775dc7a252a5d8f5b704e6b8f2326 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 03:08:31 -0300 Subject: spirv: Remove dependencies on Environment when generating SPIR-V --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 11 +++++------ src/shader_recompiler/backend/spirv/emit_spirv.h | 5 ++--- src/shader_recompiler/frontend/ir/program.h | 2 ++ src/shader_recompiler/frontend/maxwell/program.cpp | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 10 +++------- 5 files changed, 15 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4bed16e7b..2e7e6bb0c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -126,12 +126,12 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } -void DefineEntryPoint(Environment& env, const IR::Program& program, EmitContext& ctx, Id main) { +void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; switch (program.stage) { case Shader::Stage::Compute: { - const std::array workgroup_size{env.WorkgroupSize()}; + const std::array workgroup_size{program.workgroup_size}; execution_model = spv::ExecutionModel::GLCompute; ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); @@ -148,7 +148,7 @@ void DefineEntryPoint(Environment& env, const IR::Program& program, EmitContext& } break; default: - throw NotImplementedException("Stage {}", env.ShaderStage()); + throw NotImplementedException("Stage {}", program.stage); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -267,11 +267,10 @@ Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program, - u32& binding) { +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; - DefineEntryPoint(env, program, ctx, main); + DefineEntryPoint(program, ctx, main); if (profile.support_float_controls) { ctx.AddExtension("SPV_KHR_float_controls"); SetupDenormControl(profile, program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b82b16e9d..837f0e858 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,15 +8,14 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" -#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, - IR::Program& program, u32& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, + u32& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 733513c8b..0162e919c 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include @@ -19,6 +20,7 @@ struct Program { BlockList post_order_blocks; Info info; Stage stage{}; + std::array workgroup_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 0074eb89b..6efaf6ee0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -33,6 +33,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool modules; u32 binding{0}; - env_index = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == u128{}) { continue; @@ -691,11 +690,8 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - Shader::Environment& env{*envs[env_index]}; - ++env_index; - - const Shader::Profile profile{MakeProfile(key, env.ShaderStage())}; - const std::vector code{EmitSPIRV(profile, env, program, binding)}; + const Shader::Profile profile{MakeProfile(key, program.stage)}; + const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, @@ -753,7 +749,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, env, program, binding)}; + std::vector code{EmitSPIRV(base_profile, program, binding)}; return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, BuildShader(device, code)}; } -- cgit v1.2.3 From dbd882ddeb1a1a9233c0085d0b8ccb022db385b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Mar 2021 04:59:58 -0300 Subject: shader: Better interpolation and disabled attributes support --- .../backend/spirv/emit_context.cpp | 29 ++++++++++++++++-- .../backend/spirv/emit_spirv_context_get_set.cpp | 29 ++++++++++++------ src/shader_recompiler/frontend/maxwell/program.cpp | 35 ++++++++++++++++++++++ .../translate/impl/load_store_attribute.cpp | 10 +------ .../ir_opt/collect_shader_info_pass.cpp | 2 +- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 13 +++++++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 ++ 9 files changed, 101 insertions(+), 25 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4d5dabcbf..a8ca33c1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -76,6 +76,8 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { return ctx.TypeVector(ctx.TypeInt(32, true), 4); case AttributeType::UnsignedInt: return ctx.U32[4]; + case AttributeType::Disabled: + break; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -305,15 +307,36 @@ void EmitContext::DefineInputs(const Info& info) { if (info.loads_front_face) { front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); } - for (size_t index = 0; index < info.loads_generics.size(); ++index) { - if (!info.loads_generics[index]) { + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const InputVarying generic{info.input_generics[index]}; + if (!generic.used) { continue; } - const Id type{GetAttributeType(*this, profile.generic_input_types[index])}; + const AttributeType input_type{profile.generic_input_types[index]}; + if (input_type == AttributeType::Disabled) { + continue; + } + const Id type{GetAttributeType(*this, input_type)}; const Id id{DefineInput(*this, type)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + + if (stage != Stage::Fragment) { + continue; + } + switch (generic.interpolation) { + case Interpolation::Smooth: + // Default + // Decorate(id, spv::Decoration::Smooth); + break; + case Interpolation::NoPerspective: + Decorate(id, spv::Decoration::NoPerspective); + break; + case Interpolation::Flat: + Decorate(id, spv::Decoration::Flat); + break; + } } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 6fa16eb80..4cbc2aec1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,16 +10,23 @@ namespace Shader::Backend::SPIRV { namespace { -std::tuple AttrTypes(EmitContext& ctx, u32 index) { - const bool is_first_reader{ctx.stage == Stage::VertexB}; +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; + +std::optional AttrTypes(EmitContext& ctx, u32 index) { const AttributeType type{ctx.profile.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: - return {ctx.input_f32, ctx.F32[1], false}; + return AttrInfo{ctx.input_f32, ctx.F32[1], false}; case AttributeType::UnsignedInt: - return {ctx.input_u32, ctx.U32[1], true}; + return AttrInfo{ctx.input_u32, ctx.U32[1], true}; case AttributeType::SignedInt: - return {ctx.input_s32, ctx.TypeInt(32, true), true}; + return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; + case AttributeType::Disabled: + return std::nullopt; } throw InvalidArgument("Invalid attribute type {}", type); } @@ -129,11 +136,15 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - const auto [pointer_type, type, needs_cast]{AttrTypes(ctx, index)}; + const std::optional type{AttrTypes(ctx, index)}; + if (!type) { + // Attribute is disabled + return ctx.Constant(ctx.F32[1], 0.0f); + } const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{ctx.OpAccessChain(pointer_type, generic_id, element_id())}; - const Id value{ctx.OpLoad(type, pointer)}; - return needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; + const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())}; + const Id value{ctx.OpLoad(type->id, pointer)}; + return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } switch (attr) { case IR::Attribute::PositionX: diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 6efaf6ee0..a914a91f4 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -27,6 +27,40 @@ static void RemoveUnreachableBlocks(IR::Program& program) { }); } +static void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { IR::Program program; @@ -51,6 +85,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool loads_generics{}; + std::array input_generics{}; bool loads_position{}; bool loads_instance_id{}; bool loads_vertex_id{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 82536b9d6..278509bf0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,10 +221,10 @@ void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineSta } } static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].loads_generics; + const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < state.attributes.size(); ++index) { const auto& attribute = state.attributes[index]; - if (!attribute.enabled || !input_attributes[index]) { + if (!attribute.enabled || !input_attributes[index].used) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 251559b16..69dd945b2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -755,6 +755,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } switch (attr.Type()) { case Maxwell::VertexAttribute::Type::SignedNorm: case Maxwell::VertexAttribute::Type::UnsignedNorm: -- cgit v1.2.3 From e860870dd2244cd87645190c89244f1d2c4c775b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 19:53:34 -0300 Subject: shader: Implement LDS, STS, LDL, and STS and use SPIR-V 1.4 when available --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 115 +++++++++++- src/shader_recompiler/backend/spirv/emit_context.h | 21 +++ src/shader_recompiler/backend/spirv/emit_spirv.h | 14 ++ .../backend/spirv/emit_spirv_context_get_set.cpp | 10 ++ .../backend/spirv/emit_spirv_shared_memory.cpp | 175 ++++++++++++++++++ src/shader_recompiler/environment.h | 4 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 46 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 6 + .../frontend/ir/microinstruction.cpp | 6 + src/shader_recompiler/frontend/ir/opcodes.inc | 18 ++ src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../translate/impl/load_store_local_shared.cpp | 197 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 -- .../ir_opt/collect_shader_info_pass.cpp | 6 + src/shader_recompiler/profile.h | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 47 ++++- src/video_core/vulkan_common/vulkan_device.cpp | 34 ++++ src/video_core/vulkan_common/vulkan_device.h | 42 +++-- 20 files changed, 730 insertions(+), 36 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 55b846c84..003cbefb1 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -14,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp backend/spirv/emit_spirv_select.cpp + backend/spirv/emit_spirv_shared_memory.cpp backend/spirv/emit_spirv_special.cpp backend/spirv/emit_spirv_undefined.cpp backend/spirv/emit_spirv_warp.cpp @@ -111,6 +112,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/load_constant.cpp frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp + frontend/maxwell/translate/impl/load_store_local_shared.cpp frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/logic_operation.cpp frontend/maxwell/translate/impl/logic_operation_three_input.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a8ca33c1d..96d0e9b4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -9,6 +9,7 @@ #include #include "common/common_types.h" +#include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/emit_context.h" namespace Shader::Backend::SPIRV { @@ -96,11 +97,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) - : Sirit::Module(0x00010000), profile{profile_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); DefineInterfaces(program.info); + DefineLocalMemory(program); + DefineSharedMemory(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); @@ -143,6 +146,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); @@ -184,6 +189,105 @@ void EmitContext::DefineInterfaces(const Info& info) { DefineOutputs(info); } +void EmitContext::DefineLocalMemory(const IR::Program& program) { + if (program.local_memory_size == 0) { + return; + } + const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id pointer{TypePointer(spv::StorageClass::Private, type)}; + local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(local_memory); + } +} + +void EmitContext::DefineSharedMemory(const IR::Program& program) { + if (program.shared_memory_size == 0) { + return; + } + const auto make{[&](Id element_type, u32 element_size) { + const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; + const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; + Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{TypeStruct(array_type)}; + MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); + Decorate(struct_type, spv::Decoration::Block); + + const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; + const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; + const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; + Decorate(variable, spv::Decoration::Aliased); + interfaces.push_back(variable); + + return std::make_pair(variable, element_pointer); + }}; + if (profile.support_explicit_workgroup_layout) { + AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); + if (program.info.uses_int8) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); + std::tie(shared_memory_u8, shared_u8) = make(U8, 1); + } + if (program.info.uses_int16) { + AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); + std::tie(shared_memory_u16, shared_u16) = make(U16, 2); + } + std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + } + const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; + const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); + interfaces.push_back(shared_memory_u32); + + const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; + const auto make_function{[&](u32 mask, u32 size) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id offset{OpFunctionParameter(U32[1])}; + const Id insert_value{OpFunctionParameter(U32[1])}; + AddLabel(); + OpBranch(loop_header); + + AddLabel(loop_header); + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; + const Id count{Constant(U32[1], size)}; + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; + const Id old_value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), + u32_zero_value, u32_zero_value, new_value, + old_value)}; + const Id success{OpIEqual(U1, atomic_res, old_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturn(); + OpFunctionEnd(); + return func; + }}; + if (program.info.uses_int8) { + shared_store_u8_func = make_function(24, 8); + } + if (program.info.uses_int16) { + shared_store_u16_func = make_function(16, 16); + } +} + void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; @@ -234,6 +338,9 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", index)); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } std::fill_n(ssbos.data() + index, desc.count, id); index += desc.count; binding += desc.count; @@ -261,6 +368,9 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { .image_type{image_type}, }); } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } binding += desc.count; } } @@ -363,6 +473,9 @@ void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions: for (size_t i = 0; i < desc.count; ++i) { cbufs[desc.index + i].*member_type = id; } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 01b7b665d..1a4e8221a 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -73,6 +73,14 @@ public: UniformDefinitions uniform_types; + Id private_u32{}; + + Id shared_u8{}; + Id shared_u16{}; + Id shared_u32{}; + Id shared_u32x2{}; + Id shared_u32x4{}; + Id input_f32{}; Id input_u32{}; Id input_s32{}; @@ -96,6 +104,17 @@ public: Id base_vertex{}; Id front_face{}; + Id local_memory{}; + + Id shared_memory_u8{}; + Id shared_memory_u16{}; + Id shared_memory_u32{}; + Id shared_memory_u32x2{}; + Id shared_memory_u32x4{}; + + Id shared_store_u8_func{}; + Id shared_store_u16_func{}; + Id input_position{}; std::array input_generics{}; @@ -111,6 +130,8 @@ private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); + void DefineLocalMemory(const IR::Program& program); + void DefineSharedMemory(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 837f0e858..4f62af959 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -58,6 +58,8 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); @@ -94,6 +96,18 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va Id value); void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4cbc2aec1..52dcef8a4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -238,4 +238,14 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { + const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; + ctx.OpStore(pointer, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..fa2fc9ab4 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -0,0 +1,175 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); +} + +Id Word(EmitContext& ctx, Id offset) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); +} + +std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; + const Id count_id{ctx.Constant(ctx.U32[1], count)}; + return {bit, count_id}; +} +} // Anonymous namespace + +Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); + } else { + const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); + } +} + +Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; + return ctx.OpLoad(ctx.U32[1], pointer); + } else { + return Word(ctx, offset); + } +} + +Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + return ctx.OpLoad(ctx.U32[2], pointer); + } else { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; + return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), + ctx.OpLoad(ctx.U32[1], rhs_pointer)); + } +} + +Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + return ctx.OpLoad(ctx.U32[4], pointer); + } + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + std::array values{}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index + : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + values[i] = ctx.OpLoad(ctx.U32[1], pointer); + } + return ctx.OpCompositeConstruct(ctx.U32[4], values); +} + +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); + } +} + +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; + ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); + } else { + ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); + } +} + +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { + Id pointer{}; + if (ctx.profile.support_explicit_workgroup_layout) { + pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); + } else { + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); + } + ctx.OpStore(pointer, value); +} + +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; + const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; + const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; + ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); +} + +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_explicit_workgroup_layout) { + const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; + ctx.OpStore(pointer, value); + return; + } + const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + for (u32 i = 0; i < 4; ++i) { + const Id index{i == 0 ? base_index + : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 0c62c1c54..9415d02f6 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -19,6 +19,10 @@ public: [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; + [[nodiscard]] virtual u32 LocalMemorySize() const = 0; + + [[nodiscard]] virtual u32 SharedMemorySize() const = 0; + [[nodiscard]] virtual std::array WorkgroupSize() const = 0; [[nodiscard]] const ProgramHeader& SPH() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6d41442ee..d6a1d8ec2 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,52 @@ void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { Inst(Opcode::WriteGlobal128, address, vector); } +U32 IREmitter::LoadLocal(const IR::U32& word_offset) { + return Inst(Opcode::LoadLocal, word_offset); +} + +void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { + Inst(Opcode::WriteLocal, word_offset, value); +} + +Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { + switch (bit_size) { + case 8: + return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); + case 16: + return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); + case 32: + return Inst(Opcode::LoadSharedU32, offset); + case 64: + return Inst(Opcode::LoadSharedU64, offset); + case 128: + return Inst(Opcode::LoadSharedU128, offset); + } + throw InvalidArgument("Invalid bit size {}", bit_size); +} + +void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { + switch (bit_size) { + case 8: + Inst(Opcode::WriteSharedU8, offset, value); + break; + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; + case 32: + Inst(Opcode::WriteSharedU32, offset, value); + break; + case 64: + Inst(Opcode::WriteSharedU64, offset, value); + break; + case 128: + Inst(Opcode::WriteSharedU128, offset, value); + break; + default: + throw InvalidArgument("Invalid bit size {}", bit_size); + } +} + U1 IREmitter::GetZeroFromOp(const Value& op) { return Inst(Opcode::GetZeroFromOp, op); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8d50aa607..842c2bdaf 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,12 @@ public: void WriteGlobal64(const U64& address, const IR::Value& vector); void WriteGlobal128(const U64& address, const IR::Value& vector); + [[nodiscard]] U32 LoadLocal(const U32& word_offset); + void WriteLocal(const U32& word_offset, const U32& value); + + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); + void WriteShared(int bit_size, const U32& offset, const Value& value); + [[nodiscard]] U1 GetZeroFromOp(const Value& op); [[nodiscard]] U1 GetSignFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index be8eb4d4c..52a5e5034 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -76,6 +76,12 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteStorage32: case Opcode::WriteStorage64: case Opcode::WriteStorage128: + case Opcode::WriteLocal: + case Opcode::WriteSharedU8: + case Opcode::WriteSharedU16: + case Opcode::WriteSharedU32: + case Opcode::WriteSharedU64: + case Opcode::WriteSharedU128: return true; default: return false; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 5d7462d76..c75658328 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -89,6 +89,24 @@ OPCODE(WriteStorage32, Void, U32, OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) +// Local memory operations +OPCODE(LoadLocal, U32, U32, ) +OPCODE(WriteLocal, Void, U32, U32, ) + +// Shared memory operations +OPCODE(LoadSharedU8, U32, U32, ) +OPCODE(LoadSharedS8, U32, U32, ) +OPCODE(LoadSharedU16, U32, U32, ) +OPCODE(LoadSharedS16, U32, U32, ) +OPCODE(LoadSharedU32, U32, U32, ) +OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(LoadSharedU128, U32x4, U32, ) +OPCODE(WriteSharedU8, Void, U32, U32, ) +OPCODE(WriteSharedU16, Void, U32, U32, ) +OPCODE(WriteSharedU32, Void, U32, U32, ) +OPCODE(WriteSharedU64, Void, U32, U32x2, ) +OPCODE(WriteSharedU128, Void, U32, U32x4, ) + // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 0162e919c..3a37b3ab9 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -21,6 +21,8 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index a914a91f4..7b08f11b0 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -67,8 +67,10 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool offset_reg; + BitField<20, 24, u64> absolute_offset; + BitField<20, 24, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset)); + } else { + const s32 relative{static_cast(encoding.relative_offset.Value())}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +std::pair GetSize(u64 insn) { + union { + u64 raw; + BitField<48, 3, Size> size; + } const encoding{insn}; + + const Size nnn = encoding.size; + switch (encoding.size) { + case Size::U8: + return {8, false}; + case Size::S8: + return {8, true}; + case Size::U16: + return {16, false}; + case Size::S16: + return {16, true}; + case Size::B32: + return {32, false}; + case Size::B64: + return {64, false}; + case Size::B128: + return {128, false}; + default: + throw NotImplementedException("Invalid size {}", encoding.size.Value()); + } +} + +IR::Reg Reg(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> reg; + } const encoding{insn}; + + return encoding.reg; +} + +IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); +} + +IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { + return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); +} +} // Anonymous namespace + +void TranslatorVisitor::LDL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + X(dest, ir.LoadLocal(word_offset)); + for (int i = 1; i < bit_size / 32; ++i) { + X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); + } + break; + } +} + +void TranslatorVisitor::LDS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg dest{Reg(insn)}; + const auto [bit_size, is_signed]{GetSize(insn)}; + const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; + switch (bit_size) { + case 8: + case 16: + case 32: + X(dest, IR::U32{value}); + break; + case 64: + case 128: + if (!IR::IsAligned(dest, bit_size / 32)) { + throw NotImplementedException("Unaligned destination register {}", dest); + } + for (int element = 0; element < bit_size / 32; ++element) { + X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + } + break; + } +} + +void TranslatorVisitor::STL(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; + + const IR::Reg reg{Reg(insn)}; + const IR::U32 src{X(reg)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: { + const IR::U32 bit{ByteOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; + ir.WriteLocal(word_offset, value); + break; + } + case 16: { + const IR::U32 bit{ShortOffset(ir, offset)}; + const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; + ir.WriteLocal(word_offset, value); + break; + } + case 32: + case 64: + case 128: + if (!IR::IsAligned(reg, bit_size / 32)) { + throw NotImplementedException("Unaligned source register"); + } + ir.WriteLocal(word_offset, src); + for (int i = 1; i < bit_size / 32; ++i) { + ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); + } + break; + } +} + +void TranslatorVisitor::STS(u64 insn) { + const IR::U32 offset{Offset(*this, insn)}; + const IR::Reg reg{Reg(insn)}; + const int bit_size{GetSize(insn).first}; + switch (bit_size) { + case 8: + case 16: + case 32: + ir.WriteShared(bit_size, offset, X(reg)); + break; + case 64: + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); + break; + case 128: { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; + ir.WriteShared(128, offset, vector); + break; + } + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 409216640..b62d8ee2a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -193,14 +193,6 @@ void TranslatorVisitor::LD(u64) { ThrowNotImplemented(Opcode::LD); } -void TranslatorVisitor::LDL(u64) { - ThrowNotImplemented(Opcode::LDL); -} - -void TranslatorVisitor::LDS(u64) { - ThrowNotImplemented(Opcode::LDS); -} - void TranslatorVisitor::LEPC(u64) { ThrowNotImplemented(Opcode::LEPC); } @@ -309,18 +301,10 @@ void TranslatorVisitor::ST(u64) { ThrowNotImplemented(Opcode::ST); } -void TranslatorVisitor::STL(u64) { - ThrowNotImplemented(Opcode::STL); -} - void TranslatorVisitor::STP(u64) { ThrowNotImplemented(Opcode::STP); } -void TranslatorVisitor::STS(u64) { - ThrowNotImplemented(Opcode::STS); -} - void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 60be67228..c932c307b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -200,6 +200,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS8: case IR::Opcode::WriteStorageU8: case IR::Opcode::WriteStorageS8: + case IR::Opcode::LoadSharedU8: + case IR::Opcode::LoadSharedS8: + case IR::Opcode::WriteSharedU8: case IR::Opcode::SelectU8: case IR::Opcode::ConvertF16S8: case IR::Opcode::ConvertF16U8: @@ -224,6 +227,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LoadStorageS16: case IR::Opcode::WriteStorageU16: case IR::Opcode::WriteStorageS16: + case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedS16: + case IR::Opcode::WriteSharedU16: case IR::Opcode::SelectU16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e26047751..0276fc23b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,8 @@ enum class AttributeType : u8 { }; struct Profile { + u32 supported_spirv{0x00010000}; + bool unified_descriptor_binding{}; bool support_vertex_instance_id{}; bool support_float_controls{}; @@ -30,6 +32,7 @@ struct Profile { bool support_fp16_signed_zero_nan_preserve{}; bool support_fp32_signed_zero_nan_preserve{}; bool support_fp64_signed_zero_nan_preserve{}; + bool support_explicit_workgroup_layout{}; bool support_vote{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 69dd945b2..0d6a32bfd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,10 +114,12 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -132,7 +134,10 @@ public: file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + const u32 shared_memory_size{SharedMemorySize()}; + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), + sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } @@ -278,6 +283,16 @@ public: return maxwell3d->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const u64 size{sph.LocalMemorySize()}; + ASSERT(size <= std::numeric_limits::max()); + return static_cast(size); + } + + u32 SharedMemorySize() const override { + throw Shader::LogicError("Requesting shared memory size in graphics stage"); + } + std::array WorkgroupSize() const override { throw Shader::LogicError("Requesting workgroup size in a graphics stage"); } @@ -313,6 +328,16 @@ public: return kepler_compute->regs.tex_cb_index; } + u32 LocalMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.local_pos_alloc; + } + + u32 SharedMemorySize() const override { + const auto& qmd{kepler_compute->launch_description}; + return qmd.shared_alloc; + } + std::array WorkgroupSize() const override { const auto& qmd{kepler_compute->launch_description}; return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; @@ -366,6 +391,7 @@ public: u64 num_texture_types{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) @@ -381,7 +407,8 @@ public: texture_types.emplace(key, type); } if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)); + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); } @@ -402,6 +429,14 @@ public: return it->second; } + u32 LocalMemorySize() const override { + return local_memory_size; + } + + u32 SharedMemorySize() const override { + return shared_memory_size; + } + u32 TextureBoundBuffer() const override { return texture_bound; } @@ -414,6 +449,8 @@ private: std::unique_ptr code; std::unordered_map texture_types; std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; u32 texture_bound{}; u32 read_lowest{}; u32 read_highest{}; @@ -541,6 +578,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ + .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_vertex_instance_id = false, .support_float_controls = true, @@ -558,6 +596,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, .support_fp64_signed_zero_nan_preserve = float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, + .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, @@ -600,8 +639,8 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { shader = MakeShaderInfo(env, *cpu_shader_addr); } const ComputePipelineCacheKey key{ - .unique_hash = shader->unique_hash, - .shared_memory_size = qmd.shared_alloc, + .unique_hash{shader->unique_hash}, + .shared_memory_size{qmd.shared_alloc}, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 009b74f12..c027598ba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -399,6 +399,20 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; + if (khr_workgroup_memory_explicit_layout) { + workgroup_layout = { + .sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, + .pNext = nullptr, + .workgroupMemoryExplicitLayout = VK_TRUE, + .workgroupMemoryExplicitLayoutScalarBlockLayout = VK_TRUE, + .workgroupMemoryExplicitLayout8BitAccess = VK_TRUE, + .workgroupMemoryExplicitLayout16BitAccess = VK_TRUE, + }; + SetNext(next, workgroup_layout); + } + if (!ext_depth_range_unrestricted) { LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); } @@ -662,6 +676,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } bool has_khr_shader_float16_int8{}; + bool has_khr_workgroup_memory_explicit_layout{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -682,6 +697,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -694,6 +710,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_khr_workgroup_memory_explicit_layout, + VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -787,6 +805,22 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_extended_dynamic_state = true; } } + if (has_khr_workgroup_memory_explicit_layout) { + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; + layout.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR; + layout.pNext = nullptr; + features.pNext = &layout; + physical.GetFeatures2KHR(features); + + if (layout.workgroupMemoryExplicitLayout && + layout.workgroupMemoryExplicitLayout8BitAccess && + layout.workgroupMemoryExplicitLayout16BitAccess && + layout.workgroupMemoryExplicitLayoutScalarBlockLayout) { + extensions.push_back(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + khr_workgroup_memory_explicit_layout = true; + } + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c268a4f8d..ac2311e7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -168,11 +168,21 @@ public: return nv_viewport_swizzle; } - /// Returns true if the device supports VK_EXT_scalar_block_layout. + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; } + /// Returns true if the device supports VK_KHR_spirv_1_4. + bool IsKhrSpirv1_4Supported() const { + return khr_spirv_1_4; + } + + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. + bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { + return khr_workgroup_memory_explicit_layout; + } + /// Returns true if the device supports VK_EXT_index_type_uint8. bool IsExtIndexTypeUint8Supported() const { return ext_index_type_uint8; @@ -296,20 +306,22 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. - bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. - bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. - bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. - bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. - bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. - bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. - bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. - bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. - bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. - bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. - bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. - bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool has_renderdoc{}; ///< Has RenderDoc attached - bool has_nsight_graphics{}; ///< Has Nsight Graphics attached + bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. + bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. + bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. + bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. + bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. + bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info. + bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control. + bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. + bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. + bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + bool has_renderdoc{}; ///< Has RenderDoc attached + bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters std::string vendor_name; ///< Device's driver name. -- cgit v1.2.3 From 3c758d9b538e957a20ea6db136741ad2bd16406d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 28 Mar 2021 21:55:47 -0300 Subject: vk_pipeline_cache: Fix size hashing of shaders --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0d6a32bfd..8b2816c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -68,7 +68,7 @@ public: } cached_lowest = start_address; cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + return Common::CityHash128(reinterpret_cast(code.data()), *size); } void SetCachedSize(size_t size_bytes) { @@ -126,12 +126,10 @@ public: .write(reinterpret_cast(&read_highest), sizeof(read_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) .write(data.get(), code_size); - file.flush(); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } - file.flush(); if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -141,7 +139,6 @@ public: } else { file.write(reinterpret_cast(&sph), sizeof(sph)); } - file.flush(); } protected: @@ -161,10 +158,10 @@ protected: code.resize(size / INST_SIZE); u64* const data = code.data() + offset / INST_SIZE; gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { - const u64 inst = data[i / INST_SIZE]; + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + i; + return offset + index; } } guest_addr += BLOCK_SIZE; @@ -751,7 +748,7 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { continue; } const auto program{static_cast(index)}; - GraphicsEnvironment& env{graphics_envs[index]}; + auto& env{graphics_envs[index]}; const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); @@ -771,6 +768,8 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + main_pools.ReleaseContents(); ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; if (!pipeline_cache_filename.empty()) { -- cgit v1.2.3 From 34aba9627a8fad20b3b173180e2f3d679dd32293 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 27 Mar 2021 22:30:24 +0100 Subject: shader: Implement BRX --- src/shader_recompiler/CMakeLists.txt | 4 + src/shader_recompiler/backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_context_get_set.cpp | 10 +- .../backend/spirv/emit_spirv_control_flow.cpp | 4 + src/shader_recompiler/environment.h | 2 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 +++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../frontend/maxwell/control_flow.cpp | 58 +++++++++-- .../frontend/maxwell/control_flow.h | 7 +- .../maxwell/indirect_branch_table_track.cpp | 108 +++++++++++++++++++++ .../frontend/maxwell/indirect_branch_table_track.h | 28 ++++++ .../frontend/maxwell/instruction.h | 1 + .../frontend/maxwell/structured_control_flow.cpp | 57 +++++++++++ .../maxwell/translate/impl/branch_indirect.cpp | 36 +++++++ .../maxwell/translate/impl/load_constant.cpp | 29 +----- .../maxwell/translate/impl/load_constant.h | 39 ++++++++ .../maxwell/translate/impl/not_implemented.cpp | 8 -- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 21 +++- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 +++++++++- 21 files changed, 437 insertions(+), 48 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 003cbefb1..44ab929b7 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -52,6 +52,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/control_flow.h frontend/maxwell/decode.cpp frontend/maxwell/decode.h + frontend/maxwell/indirect_branch_table_track.cpp + frontend/maxwell/indirect_branch_table_track.h frontend/maxwell/instruction.h frontend/maxwell/location.h frontend/maxwell/maxwell.inc @@ -63,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/bitfield_extract.cpp frontend/maxwell/translate/impl/bitfield_insert.cpp + frontend/maxwell/translate/impl/branch_indirect.cpp frontend/maxwell/translate/impl/common_encoding.h frontend/maxwell/translate/impl/common_funcs.cpp frontend/maxwell/translate/impl/common_funcs.h @@ -110,6 +113,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp frontend/maxwell/translate/impl/load_constant.cpp + frontend/maxwell/translate/impl/load_constant.h frontend/maxwell/translate/impl/load_effective_address.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_local_shared.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 204c5f9e0..02648d769 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -26,6 +26,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); @@ -35,6 +36,8 @@ void EmitGetPred(EmitContext& ctx); void EmitSetPred(EmitContext& ctx); void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 52dcef8a4..4a267b16c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,8 +6,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { struct AttrInfo { @@ -74,6 +72,14 @@ void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +void EmitSetIndirectBranchVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitGetIndirectBranchVariable(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6b81f0169..335603f88 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitUnreachable(EmitContext& ctx) { + ctx.OpUnreachable(); +} + void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label) { ctx.OpDemoteToHelperInvocationEXT(); ctx.OpBranch(continue_label); diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 9415d02f6..1c50ae51e 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -15,6 +15,8 @@ public: [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 9b898e4e1..552472487 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -87,6 +87,10 @@ void IREmitter::Return() { Inst(Opcode::Return); } +void IREmitter::Unreachable() { + Inst(Opcode::Unreachable); +} + void IREmitter::DemoteToHelperInvocation(Block* continue_label) { block->SetBranch(continue_label); continue_label->AddImmediatePredecessor(block); @@ -126,6 +130,14 @@ void IREmitter::SetGotoVariable(u32 id, const U1& value) { Inst(Opcode::SetGotoVariable, id, value); } +U32 IREmitter::GetIndirectBranchVariable() { + return Inst(Opcode::GetIndirectBranchVariable); +} + +void IREmitter::SetIndirectBranchVariable(const U32& value) { + Inst(Opcode::SetIndirectBranchVariable, value); +} + void IREmitter::SetPred(IR::Pred pred, const U1& value) { Inst(Opcode::SetPred, pred, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 269f367a4..17bc32fc8 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -37,6 +37,7 @@ public: void LoopMerge(Block* merge_block, Block* continue_target); void SelectionMerge(Block* merge_block); void Return(); + void Unreachable(); void DemoteToHelperInvocation(Block* continue_label); void Prologue(); @@ -51,6 +52,9 @@ public: [[nodiscard]] U1 GetGotoVariable(u32 id); void SetGotoVariable(u32 id, const U1& value); + [[nodiscard]] U32 GetIndirectBranchVariable(); + void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 52a5e5034..c3ba6b522 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -55,6 +55,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Prologue: case Opcode::Epilogue: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9b050995b..fb79e3d8d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) // Special operations @@ -26,6 +27,8 @@ OPCODE(GetPred, U1, Pred OPCODE(SetPred, Void, Pred, U1, ) OPCODE(GetGotoVariable, U1, U32, ) OPCODE(SetGotoVariable, Void, U32, U1, ) +OPCODE(GetIndirectBranchVariable, U32, ) +OPCODE(SetIndirectBranchVariable, Void, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 4f6707fae..1e9b8e426 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -14,6 +14,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" #include "shader_recompiler/frontend/maxwell/location.h" namespace Shader::Maxwell::Flow { @@ -252,9 +253,7 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati const Opcode opcode{Decode(inst.raw)}; switch (opcode) { case Opcode::BRA: - case Opcode::BRX: case Opcode::JMP: - case Opcode::JMX: case Opcode::RET: if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { return AnalysisState::Continue; @@ -264,10 +263,6 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::JMP: AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); break; - case Opcode::BRX: - case Opcode::JMX: - AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode)); - break; case Opcode::RET: block->end_class = EndClass::Return; break; @@ -302,6 +297,9 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati case Opcode::SSY: block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); return AnalysisState::Continue; + case Opcode::BRX: + case Opcode::JMX: + return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); case Opcode::EXIT: return AnalyzeEXIT(block, function_id, pc, inst); case Opcode::PRET: @@ -407,8 +405,46 @@ void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruct block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); } -void CFG::AnalyzeBRX(Block*, Location, Instruction, bool is_absolute) { - throw NotImplementedException("{}", is_absolute ? "JMX" : "BRX"); +CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id) { + const std::optional brx_table{TrackIndirectBranchTable(env, pc, block->begin)}; + if (!brx_table) { + TrackIndirectBranchTable(env, pc, block->begin); + throw NotImplementedException("Failed to track indirect branch"); + } + const IR::FlowTest flow_test{inst.branch.flow_test}; + const Predicate pred{inst.Pred()}; + if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { + throw NotImplementedException("Conditional indirect branch"); + } + std::vector targets; + targets.reserve(brx_table->num_entries); + for (u32 i = 0; i < brx_table->num_entries; ++i) { + u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; + if (!is_absolute) { + target += pc.Offset(); + } + target += brx_table->branch_offset; + target += 8; + targets.push_back(target); + } + std::ranges::sort(targets); + targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); + + block->indirect_branches.reserve(targets.size()); + for (const u32 target : targets) { + Block* const branch{AddLabel(block, block->stack, target, function_id)}; + block->indirect_branches.push_back(branch); + } + block->cond = IR::Condition{true}; + block->end = pc + 1; + block->end_class = EndClass::IndirectBranch; + block->branch_reg = brx_table->branch_reg; + block->branch_offset = brx_table->branch_offset + 8; + if (!is_absolute) { + block->branch_offset += pc.Offset(); + } + return AnalysisState::Branch; } CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, @@ -449,7 +485,6 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function // Block already exists and it has been visited return &*it; } - // TODO: FIX DANGLING BLOCKS Block* const new_block{block_pool.Create(Block{ .begin{pc}, .end{pc}, @@ -494,6 +529,11 @@ std::string CFG::Dot() const { add_branch(block.branch_false, false); } break; + case EndClass::IndirectBranch: + for (Block* const branch : block.indirect_branches) { + add_branch(branch, false); + } + break; case EndClass::Call: dot += fmt::format("\t\t{}->N{};\n", name, node_uid); dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 22f134194..1e05fcb97 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -26,6 +26,7 @@ using FunctionId = size_t; enum class EndClass { Branch, + IndirectBranch, Call, Exit, Return, @@ -76,11 +77,14 @@ struct Block : boost::intrusive::set_base_hook< union { Block* branch_true; FunctionId function_call; + IR::Reg branch_reg; }; union { Block* branch_false; Block* return_block; + s32 branch_offset; }; + std::vector indirect_branches; }; struct Label { @@ -139,7 +143,8 @@ private: void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, bool is_absolute); - void AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute); + AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, + FunctionId function_id); AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); /// Return the branch target block id diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..96453509d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -0,0 +1,108 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/decode.h" +#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" +#include "shader_recompiler/frontend/maxwell/opcodes.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" + +namespace Shader::Maxwell { +namespace { +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 19, u64> immediate; + BitField<56, 1, u64> is_negative; + BitField<20, 24, s64> brx_offset; +}; + +template +std::optional Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { + while (pos >= block_begin) { + const u64 insn{env.ReadInstruction(pos.Offset())}; + --pos; + if (func(insn, Decode(insn))) { + return insn; + } + } + return std::nullopt; +} + +std::optional TrackLDC(Environment& env, Location block_begin, Location& pos, + IR::Reg brx_reg) { + return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { + const LDC::Encoding ldc{insn}; + return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && + ldc.mode == LDC::Mode::Default; + }); +} + +std::optional TrackSHL(Environment& env, Location block_begin, Location& pos, + IR::Reg ldc_reg) { + return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { + const Encoding shl{insn}; + return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; + }); +} + +std::optional TrackIMNMX(Environment& env, Location block_begin, Location& pos, + IR::Reg shl_reg) { + return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { + const Encoding imnmx{insn}; + return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; + }); +} +} // Anonymous namespace + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin) { + const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; + const Opcode brx_opcode{Decode(brx_insn)}; + if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { + throw LogicError("Tracked instruction is not BRX or JMX"); + } + const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; + const s32 brx_offset{static_cast(Encoding{brx_insn}.brx_offset)}; + + Location pos{brx_pos}; + const std::optional ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; + if (!ldc_insn) { + return std::nullopt; + } + const LDC::Encoding ldc{*ldc_insn}; + const u32 cbuf_index{static_cast(ldc.index)}; + const u32 cbuf_offset{static_cast(static_cast(ldc.offset.Value()))}; + const IR::Reg ldc_reg{ldc.src_reg}; + + const std::optional shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; + if (!shl_insn) { + return std::nullopt; + } + const Encoding shl{*shl_insn}; + const IR::Reg shl_reg{shl.src_reg}; + + const std::optional imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; + if (!imnmx_insn) { + return std::nullopt; + } + const Encoding imnmx{*imnmx_insn}; + if (imnmx.is_negative != 0) { + return std::nullopt; + } + const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; + return IndirectBranchTableInfo{ + .cbuf_index{cbuf_index}, + .cbuf_offset{cbuf_offset}, + .num_entries{imnmx_immediate + 1}, + .branch_offset{brx_offset}, + .branch_reg{brx_reg}, + }; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/maxwell/location.h" + +namespace Shader::Maxwell { + +struct IndirectBranchTableInfo { + u32 cbuf_index{}; + u32 cbuf_offset{}; + u32 num_entries{}; + s32 branch_offset{}; + IR::Reg branch_reg{}; +}; + +std::optional TrackIndirectBranchTable(Environment& env, Location brx_pos, + Location block_begin); + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h index 57fd531f2..743d68d61 100644 --- a/src/shader_recompiler/frontend/maxwell/instruction.h +++ b/src/shader_recompiler/frontend/maxwell/instruction.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/ir/flow_test.h" +#include "shader_recompiler/frontend/ir/reg.h" namespace Shader::Maxwell { diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 9d4688390..a6e55f61e 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -17,6 +17,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/maxwell/decode.h" #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/object_pool.h" @@ -46,12 +47,15 @@ enum class StatementType { Break, Return, Kill, + Unreachable, Function, Identity, Not, Or, SetVariable, + SetIndirectBranchVariable, Variable, + IndirectBranchCond, }; bool HasChildren(StatementType type) { @@ -72,12 +76,15 @@ struct Loop {}; struct Break {}; struct Return {}; struct Kill {}; +struct Unreachable {}; struct FunctionTag {}; struct Identity {}; struct Not {}; struct Or {}; struct SetVariable {}; +struct SetIndirectBranchVariable {}; struct Variable {}; +struct IndirectBranchCond {}; #ifdef _MSC_VER #pragma warning(push) @@ -96,6 +103,7 @@ struct Statement : ListBaseHook { : cond{cond_}, up{up_}, type{StatementType::Break} {} Statement(Return) : type{StatementType::Return} {} Statement(Kill) : type{StatementType::Kill} {} + Statement(Unreachable) : type{StatementType::Unreachable} {} Statement(FunctionTag) : children{}, type{StatementType::Function} {} Statement(Identity, IR::Condition cond_) : guest_cond{cond_}, type{StatementType::Identity} {} Statement(Not, Statement* op_) : op{op_}, type{StatementType::Not} {} @@ -103,7 +111,12 @@ struct Statement : ListBaseHook { : op_a{op_a_}, op_b{op_b_}, type{StatementType::Or} {} Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} + Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_) + : branch_offset{branch_offset_}, + branch_reg{branch_reg_}, type{StatementType::SetIndirectBranchVariable} {} Statement(Variable, u32 id_) : id{id_}, type{StatementType::Variable} {} + Statement(IndirectBranchCond, u32 location_) + : location{location_}, type{StatementType::IndirectBranchCond} {} ~Statement() { if (HasChildren(type)) { @@ -118,11 +131,14 @@ struct Statement : ListBaseHook { IR::Condition guest_cond; Statement* op; Statement* op_a; + u32 location; + s32 branch_offset; }; union { Statement* cond; Statement* op_b; u32 id; + IR::Reg branch_reg; }; Statement* up{}; StatementType type; @@ -141,6 +157,8 @@ std::string DumpExpr(const Statement* stmt) { return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); case StatementType::Variable: return fmt::format("goto_L{}", stmt->id); + case StatementType::IndirectBranchCond: + return fmt::format("(indirect_branch == {:x})", stmt->location); default: return ""; } @@ -182,14 +200,22 @@ std::string DumpTree(const Tree& tree, u32 indentation = 0) { case StatementType::Kill: ret += fmt::format("{} kill;\n", indent); break; + case StatementType::Unreachable: + ret += fmt::format("{} unreachable;\n", indent); + break; case StatementType::SetVariable: ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); break; + case StatementType::SetIndirectBranchVariable: + ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, + stmt->branch_offset); + break; case StatementType::Function: case StatementType::Identity: case StatementType::Not: case StatementType::Or: case StatementType::Variable: + case StatementType::IndirectBranchCond: throw LogicError("Statement can't be printed"); } } @@ -417,6 +443,17 @@ private: } break; } + case Flow::EndClass::IndirectBranch: + root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, + block.branch_offset)); + for (Flow::Block* const branch : block.indirect_branches) { + const Node indirect_label{local_labels.at(branch)}; + Statement* cond{pool.Create(IndirectBranchCond{}, branch->begin.Offset())}; + Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; + gotos.push_back(root.insert(ip, *goto_stmt)); + } + root.insert(ip, *pool.Create(Unreachable{})); + break; case Flow::EndClass::Call: { Flow::Function& call{cfg.Functions()[block.function_call]}; const Node call_return_label{local_labels.at(block.return_block)}; @@ -623,6 +660,8 @@ IR::Block* TryFindForwardBlock(const Statement& stmt) { return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); case StatementType::Variable: return ir.GetGotoVariable(stmt.id); + case StatementType::IndirectBranchCond: + return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); default: throw NotImplementedException("Statement type {}", stmt.type); } @@ -670,6 +709,15 @@ private: ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); break; } + case StatementType::SetIndirectBranchVariable: { + if (!current_block) { + current_block = MergeBlock(parent, stmt); + } + IR::IREmitter ir{*current_block}; + IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; + ir.SetIndirectBranchVariable(address); + break; + } case StatementType::If: { if (!current_block) { current_block = block_pool.Create(inst_pool); @@ -756,6 +804,15 @@ private: current_block = demote_block; break; } + case StatementType::Unreachable: { + if (!current_block) { + current_block = block_pool.Create(inst_pool); + block_list.push_back(current_block); + } + IR::IREmitter{*current_block}.Unreachable(); + current_block = nullptr; + break; + } default: throw NotImplementedException("Statement type {}", stmt.type); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp @@ -0,0 +1,36 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void Check(u64 insn) { + union { + u64 raw; + BitField<5, 1, u64> cbuf_mode; + BitField<6, 1, u64> lmt; + } const encoding{insn}; + + if (encoding.cbuf_mode != 0) { + throw NotImplementedException("Constant buffer mode"); + } + if (encoding.lmt != 0) { + throw NotImplementedException("LMT"); + } +} +} // Anonymous namespace + +void TranslatorVisitor::BRX(u64 insn) { + Check(insn); +} + +void TranslatorVisitor::JMX(u64 insn) { + Check(insn); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index 39becf93c..49ccb7d62 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -5,25 +5,11 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" namespace Shader::Maxwell { +using namespace LDC; namespace { -enum class Mode : u64 { - Default, - IL, - IS, - ISL, -}; - -enum class Size : u64 { - U8, - S8, - U16, - S16, - B32, - B64, -}; - std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, const IR::U32& reg, const IR::U32& imm) { switch (mode) { @@ -37,16 +23,7 @@ std::pair Slot(IR::IREmitter& ir, Mode mode, const IR::U32& im } // Anonymous namespace void TranslatorVisitor::LDC(u64 insn) { - union { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<8, 8, IR::Reg> src_reg; - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - BitField<44, 2, Mode> mode; - BitField<48, 3, Size> size; - } const ldc{insn}; - + const Encoding ldc{insn}; const IR::U32 imm_index{ir.Imm32(static_cast(ldc.index))}; const IR::U32 reg{X(ldc.src_reg)}; const IR::U32 imm{ir.Imm32(static_cast(ldc.offset))}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h @@ -0,0 +1,39 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/reg.h" + +namespace Shader::Maxwell::LDC { + +enum class Mode : u64 { + Default, + IL, + IS, + ISL, +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, +}; + +union Encoding { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<20, 16, s64> offset; + BitField<36, 5, u64> index; + BitField<44, 2, Mode> mode; + BitField<48, 3, Size> size; +}; + +} // namespace Shader::Maxwell::LDC diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index b62d8ee2a..a0057a473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -53,10 +53,6 @@ void TranslatorVisitor::BRK(u64) { ThrowNotImplemented(Opcode::BRK); } -void TranslatorVisitor::BRX(u64) { - ThrowNotImplemented(Opcode::BRX); -} - void TranslatorVisitor::CAL() { // CAL is a no-op } @@ -181,10 +177,6 @@ void TranslatorVisitor::JMP(u64) { ThrowNotImplemented(Opcode::JMP); } -void TranslatorVisitor::JMX(u64) { - ThrowNotImplemented(Opcode::JMX); -} - void TranslatorVisitor::KIL() { // KIL is a no-op } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bab7ca186..259233746 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,8 +48,12 @@ struct GotoVariable : FlagTag { u32 index; }; +struct IndirectBranchVariable { + auto operator<=>(const IndirectBranchVariable&) const noexcept = default; +}; + using Variant = std::variant; + OverflowFlagTag, GotoVariable, IndirectBranchVariable>; using ValueMap = boost::container::flat_map>; struct DefTable { @@ -65,6 +69,10 @@ struct DefTable { return goto_vars[goto_variable.index]; } + [[nodiscard]] ValueMap& operator[](IndirectBranchVariable) { + return indirect_branch_var; + } + [[nodiscard]] ValueMap& operator[](ZeroFlagTag) noexcept { return zero_flag; } @@ -84,6 +92,7 @@ struct DefTable { std::array regs; std::array preds; boost::container::flat_map goto_vars; + ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; ValueMap carry_flag; @@ -102,6 +111,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { + return IR::Opcode::UndefU32; +} + [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { return inst.Opcode() == IR::Opcode::Phi; } @@ -219,6 +232,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetIndirectBranchVariable: + pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); + break; case IR::Opcode::SetZFlag: pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); break; @@ -244,6 +260,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetIndirectBranchVariable: + inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); + break; case IR::Opcode::GetZFlag: inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); break; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8b2816c13..6cde01491 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { } u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | static_cast(offset); + return (static_cast(index) << 32) | offset; } class GenericEnvironment : public Shader::Environment { @@ -114,11 +114,13 @@ public: gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; const u32 local_memory_size{LocalMemorySize()}; const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) @@ -130,6 +132,10 @@ public: file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } if (stage == Shader::Stage::Compute) { const std::array workgroup_size{WorkgroupSize()}; const u32 shared_memory_size{SharedMemorySize()}; @@ -212,6 +218,7 @@ protected: std::vector code; std::unordered_map texture_types; + std::unordered_map cbuf_values; u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -267,6 +274,17 @@ public: ~GraphicsEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{maxwell3d->regs}; const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; @@ -312,6 +330,18 @@ public: ~ComputeEnvironment() override = default; + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; @@ -386,8 +416,10 @@ public: void Deserialize(std::ifstream& file) { u64 code_size{}; u64 num_texture_types{}; + u64 num_cbuf_values{}; file.read(reinterpret_cast(&code_size), sizeof(code_size)) .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .read(reinterpret_cast(&start_address), sizeof(start_address)) @@ -403,6 +435,13 @@ public: .read(reinterpret_cast(&type), sizeof(type)); texture_types.emplace(key, type); } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } if (stage == Shader::Stage::Compute) { file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -418,6 +457,14 @@ public: return code[(address - read_lowest) / sizeof(u64)]; } + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; + } + Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; if (it == texture_types.end()) { @@ -445,6 +492,7 @@ public: private: std::unique_ptr code; std::unordered_map texture_types; + std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; u32 shared_memory_size{}; -- cgit v1.2.3 From 7a1c14269e20cffeed780f388c90a86f8bba1a92 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 03:58:46 -0300 Subject: spirv: Add fixed pipeline point size --- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | 4 ++++ src/shader_recompiler/profile.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2c93bada5..5cd505d99 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -495,7 +495,7 @@ void EmitContext::DefineOutputs(const Info& info) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); } - if (info.stores_point_size) { + if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in Fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 44d2fde02..5f80c189f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -17,6 +17,10 @@ void EmitPrologue(EmitContext& ctx) { ctx.OpStore(generic_id, default_vector); } } + if (ctx.profile.fixed_state_point_size) { + const float point_size{*ctx.profile.fixed_state_point_size}; + ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); + } } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 0276fc23b..f4b94896c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/common_types.h" @@ -41,6 +42,8 @@ struct Profile { std::array generic_input_types{}; bool convert_depth_mode{}; + + std::optional fixed_state_point_size; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6cde01491..eb4df9000 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -864,6 +864,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; if (stage == Shader::Stage::VertexB) { profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); } -- cgit v1.2.3 From dc1a9a3bed2aa9b0851f07976b0c687172aa3edc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 26 Mar 2021 20:51:05 +0100 Subject: shader: Implement TLD --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_spirv_image.cpp | 2 +- src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- src/shader_recompiler/frontend/maxwell/maxwell.inc | 4 +- .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/texture_load.cpp | 165 +++++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 1 + src/video_core/memory_manager.cpp | 3 +- 8 files changed, 174 insertions(+), 16 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 5ce420cbf..4b4c43ba8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp frontend/maxwell/translate/impl/texture_gather.cpp + frontend/maxwell/translate/impl/texture_load.cpp frontend/maxwell/translate/impl/texture_query.cpp frontend/maxwell/translate/impl/video_helper.cpp frontend/maxwell/translate/impl/video_helper.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 3ea0011aa..310cc7af7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -254,7 +254,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c const auto info{inst->Flags()}; const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, index), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 717aa71ca..302b8471d 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -378,7 +378,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BindlessImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -387,7 +387,7 @@ OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(BoundImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -396,7 +396,7 @@ OPCODE(ImageSampleDrefImplicitLod, F32, U32, OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, U32, U32, ) +OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) // Warp operations diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc index d668dc1aa..b47fb9c2e 100644 --- a/src/shader_recompiler/frontend/maxwell/maxwell.inc +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc @@ -252,8 +252,8 @@ INST(SYNC, "SYNC", "1111 0000 1111 1---") INST(TEX, "TEX", "1100 0--- ---- ----") INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") INST(TEXS, "TEXS", "1101 -00- ---- ----") -INST(TLD, "TLD", "1101 1100 --11 1---") -INST(TLD_b, "TLD (b)", "1101 1101 --11 1---") +INST(TLD, "TLD", "1101 1100 ---- ----") +INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") INST(TLD4, "TLD4", "1100 10-- ---- ----") INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6a580f831..60d61ec6e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } -void TranslatorVisitor::TLD(u64) { - ThrowNotImplemented(Opcode::TLD); -} - -void TranslatorVisitor::TLD_b(u64) { - ThrowNotImplemented(Opcode::TLD_b); -} - void TranslatorVisitor::TLDS(u64) { ThrowNotImplemented(Opcode::TLDS); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..b4063fa6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -0,0 +1,165 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { + +enum class TextureType : u64 { + _1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, + ARRAY_3D, + CUBE, + ARRAY_CUBE, +}; + +Shader::TextureType GetType(TextureType type, bool dc) { + switch (type) { + case TextureType::_1D: + return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + case TextureType::ARRAY_1D: + return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + case TextureType::_2D: + return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + case TextureType::ARRAY_2D: + return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + case TextureType::_3D: + return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + case TextureType::ARRAY_CUBE: + return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { + const auto read_array{ + [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; + switch (type) { + case TextureType::_1D: + return v.X(reg); + case TextureType::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); + case TextureType::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); + case TextureType::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_3D: + throw NotImplementedException("3D array texture type"); + case TextureType::CUBE: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); + case TextureType::ARRAY_CUBE: + return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { + const IR::U32 value{v.X(reg++)}; + switch (type) { + case TextureType::_1D: + case TextureType::ARRAY_1D: + return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); + case TextureType::_2D: + case TextureType::ARRAY_2D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); + case TextureType::_3D: + case TextureType::ARRAY_3D: + return v.ir.CompositeConstruct( + v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), + v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); + case TextureType::CUBE: + case TextureType::ARRAY_CUBE: + throw NotImplementedException("Illegal offset on CUBE sample"); + } + throw NotImplementedException("Invalid texture type {}", type); +} + +void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { + union { + u64 raw; + BitField<49, 1, u64> nodep; + BitField<55, 1, u64> lod; + BitField<50, 1, u64> multisample; + BitField<35, 1, u64> aoffi; + BitField<54, 1, u64> clamp; + BitField<51, 3, IR::Pred> sparse_pred; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<20, 8, IR::Reg> meta_reg; + BitField<28, 3, TextureType> type; + BitField<31, 4, u64> mask; + BitField<36, 13, u64> cbuf_offset; + } const tld{insn}; + + const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; + + IR::Reg meta_reg{tld.meta_reg}; + IR::Value handle; + IR::Value offset; + IR::U32 lod; + IR::U32 multisample; + if (!is_bindless) { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); + } else { + handle = v.X(meta_reg++); + } + if (tld.lod != 0) { + lod = v.X(meta_reg++); + } + if (tld.aoffi != 0) { + offset = MakeOffset(v, meta_reg, tld.type); + } + if (tld.multisample != 0) { + multisample = v.X(meta_reg++); + } + if (tld.clamp != 0) { + throw NotImplementedException("TLD.CL - CLAMP is not implmented"); + } + IR::TextureInstInfo info{}; + info.type.Assign(GetType(tld.type, false)); + const IR::Value sample{[&]() -> IR::Value { + return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); + }()}; + + IR::Reg dest_reg{tld.dest_reg}; + for (size_t element = 0; element < 4; ++element) { + if (((tld.mask >> element) & 1) == 0) { + continue; + } + v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); + ++dest_reg; + } + if (tld.sparse_pred != IR::Pred::PT) { + v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); + } +} +} // Anonymous namespace + +void TranslatorVisitor::TLD(u64 insn) { + Impl(*this, insn, false); +} + +void TranslatorVisitor::TLD_b(u64 insn) { + Impl(*this, insn, true); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index eb3d1343f..3b00d7c8c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -382,6 +382,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageSampleDrefExplicitLod: case IR::Opcode::ImageGather: case IR::Opcode::ImageGatherDref: + case IR::Opcode::ImageFetch: case IR::Opcode::ImageQueryDimensions: { const TextureType type{inst.Flags().type}; info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index d2b9d5f2b..05e27c687 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -64,12 +64,11 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end()) { - ASSERT(it->first == gpu_addr); + // ASSERT(it->first == gpu_addr); map_ranges.erase(it); } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); for (const auto& map : submapped_ranges) { -- cgit v1.2.3 From 4d0d29fc2092bf02e102b8bac9cfa1b509274901 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Tue, 30 Mar 2021 08:41:21 +0200 Subject: shader: Address feedback --- .../backend/spirv/emit_spirv_image.cpp | 30 +++++++++---------- .../maxwell/translate/impl/texture_gradient.cpp | 34 ++++++++++++---------- .../maxwell/translate/impl/texture_load.cpp | 10 +++---- .../translate/impl/texture_mipmap_level.cpp | 26 +++++++---------- src/video_core/memory_manager.cpp | 2 +- 5 files changed, 49 insertions(+), 53 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 1eba9cc00..03d2ec73e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -72,20 +72,19 @@ public: explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, Id offset, Id lod_clamp) { if (Sirit::ValidId(derivates)) { - boost::container::static_vector deriv_x_accum; - boost::container::static_vector deriv_y_accum; - for (size_t i = 0; i < num_derivates; i++) { - deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); - deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); - } - Id derivates_X = ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()}); - Id derivates_Y = ctx.OpCompositeConstruct( - ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()}); - Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); - } else { throw LogicError("Derivates must be present"); } + boost::container::static_vector deriv_x_accum; + boost::container::static_vector deriv_y_accum; + for (size_t i = 0; i < num_derivates; i++) { + deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); + deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); + } + const Id derivates_X{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; + const Id derivates_Y{ctx.OpCompositeConstruct( + ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); if (Sirit::ValidId(offset)) { Add(spv::ImageOperandsMask::Offset, offset); } @@ -100,10 +99,10 @@ public: operands.push_back(value); } - void Add(spv::ImageOperandsMask new_mask, Id value, Id value_2) { + void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { mask = static_cast(static_cast(mask) | static_cast(new_mask)); - operands.push_back(value); + operands.push_back(value_1); operands.push_back(value_2); } @@ -345,7 +344,8 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coo Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp) { const auto info{inst->Flags()}; - const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); + const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, + offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, operands.Mask(), operands.Span()); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index 00768e167..c66468a48 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -47,7 +47,7 @@ Shader::TextureType GetType(TextureType type, bool dc) { IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { const IR::U32 value{v.X(reg)}; - const u32 base = has_lod_clamp ? 12 : 16; + const u32 base{has_lod_clamp ? 12U : 16U}; return v.ir.CompositeConstruct( v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); @@ -74,20 +74,21 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::Value coords; - u32 num_derivates; - IR::Reg base_reg = txd.coord_reg; + u32 num_derivates{}; + IR::Reg base_reg{txd.coord_reg}; IR::Reg last_reg; IR::Value handle; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(base_reg++); + } else { + handle = v.ir.Imm32(static_cast(txd.cbuf_offset.Value() * 4)); } const auto read_array{[&]() -> IR::F32 { - return v.ir.ConvertUToF(32, 16, - v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(0), - v.ir.Imm32(has_lod_clamp ? 12 : 16))); + const IR::U32 base{v.ir.Imm32(0)}; + const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; + const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; + return v.ir.ConvertUToF(32, 16, array_index); }}; switch (txd.type) { case TextureType::_1D: { @@ -141,19 +142,20 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::F32 lod_clamp; if (has_lod_clamp) { - const IR::F32 conv4_8fixp_f = v.ir.Imm32(Common::BitCast(0x3b800000U)); - const IR::F32 tmp = v.ir.ConvertUToF( - 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12))); - lod_clamp = v.ir.FPMul(tmp, conv4_8fixp_f); + // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. + // to convert a fixed point, float(value) / float(1 << fixed_point) + // in this case the fixed_point is 8. + const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast(1U << 8))}; + const IR::F32 fixp_lc{v.ir.ConvertUToF( + 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; + lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); } IR::TextureInstInfo info{}; info.type.Assign(GetType(txd.type, false)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info); - }()}; + const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; IR::Reg dest_reg{txd.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index df38f87a3..987b7ec34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -117,10 +117,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { IR::Value offset; IR::U32 lod; IR::U32 multisample; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tld.cbuf_offset.Value() * 4)); } if (tld.lod != 0) { lod = v.X(meta_reg++); @@ -138,9 +138,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; info.type.Assign(GetType(tld.type, false)); - const IR::Value sample{[&]() -> IR::Value { - return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); - }()}; + const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; for (size_t element = 0; element < 4; ++element) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index ee13ede30..b6efc04f0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -81,39 +81,35 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { BitField<36, 13, u64> cbuf_offset; } const tmml{insn}; - if ((tmml.mask & 0xC) != 0) { + if ((tmml.mask & 0b1100) != 0) { throw NotImplementedException("TMML BA results are not implmented"); } - IR::F32 transform_constant = v.ir.Imm32(256.0f); + IR::F32 transform_constant{v.ir.Imm32(256.0f)}; const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; IR::U32 handle; IR::Reg meta_reg{tmml.meta_reg}; - if (!is_bindless) { - handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); - } else { + if (is_bindless) { handle = v.X(meta_reg++); + } else { + handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; info.type.Assign(GetType(tmml.type, false)); - const IR::Value sample{ - [&]() -> IR::Value { return v.ir.ImageQueryLod(handle, coords, info); }()}; + const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; - const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RP}, - .fmz_mode{IR::FmzMode::FTZ}, - }; IR::Reg dest_reg{tmml.dest_reg}; for (size_t element = 0; element < 4; ++element) { if (((tmml.mask >> element) & 1) == 0) { continue; } - IR::F32 value = IR::F32{v.ir.CompositeExtract(sample, element)}; - v.F(dest_reg, - element < 2 ? IR::F32{v.ir.FPMul(value, transform_constant, fp_control)} : value); + IR::F32 value{v.ir.CompositeExtract(sample, element)}; + if (element < 2) { + value = v.ir.FPMul(value, transform_constant); + } + v.F(dest_reg, value); ++dest_reg; } } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 05e27c687..882eff880 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -64,7 +64,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end()) { - // ASSERT(it->first == gpu_addr); + ASSERT(it->first == gpu_addr); map_ranges.erase(it); } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); -- cgit v1.2.3 From 0c933e20dec02e12a4644281b9b7bf9716a5cbb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 30 Mar 2021 21:28:00 -0300 Subject: vk_pipeline_cache: Name SPIR-V modules --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index eb4df9000..30a707599 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -777,6 +777,11 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; modules[stage_index] = BuildShader(device, code); + if (device.HasDebuggingToolAttached()) { + const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], + key.unique_hashes[index][1])}; + modules[stage_index].SetObjectNameEXT(name.c_str()); + } } return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, @@ -836,8 +841,13 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; std::vector code{EmitSPIRV(base_profile, program, binding)}; + vk::ShaderModule spv_module{BuildShader(device, code)}; + if (device.HasDebuggingToolAttached()) { + const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + spv_module.SetObjectNameEXT(name.c_str()); + } return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - BuildShader(device, code)}; + std::move(spv_module)}; } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { -- cgit v1.2.3 From 2fc698b040e7e25223ba6ebe31abb04b1fc65f06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 01:36:22 -0300 Subject: vulkan: Build pipelines in parallel at runtime Wait from the worker thread for a pipeline to build before binding it to the command buffer. This allows queueing pipelines to multiple threads. --- .../renderer_vulkan/vk_compute_pipeline.cpp | 95 +++++++++++++--------- .../renderer_vulkan/vk_compute_pipeline.h | 30 +++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 72 +++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 31 +++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 78 +++++++++--------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 24 ++++-- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 15 +--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 10 +-- src/video_core/renderer_vulkan/vk_scheduler.h | 7 +- 9 files changed, 197 insertions(+), 165 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e544d745..1c3249e3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -27,8 +27,9 @@ DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& inf ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + Common::ThreadWorker* thread_worker, const Shader::Info& info_, + vk::ShaderModule spv_module_) + : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { DescriptorLayoutTuple tuple{CreateLayout(device, info)}; descriptor_set_layout = std::move(tuple.descriptor_set_layout); @@ -36,46 +37,55 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + auto func{[this, &device] { + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(); + } } -void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { +void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, TextureCache& texture_cache) { + update_descriptor_queue.Acquire(); + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); - ++index; + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); -} -void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, - TextureCache& texture_cache) { texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; @@ -103,15 +113,26 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t index{}; + size_t image_index{}; PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - *update_descriptor_queue, index); -} + update_descriptor_queue, image_index); -VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + if (!building_flag.test()) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + }); + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - return descriptor_set; + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index e82e5816b..02da504f7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,10 @@ #pragma once +#include + #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" @@ -16,36 +19,26 @@ namespace Vulkan { class Device; +class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - const Shader::Info& info, vk::ShaderModule spv_module); + Common::ThreadWorker* thread_worker, const Shader::Info& info, + vk::ShaderModule spv_module); - ComputePipeline& operator=(ComputePipeline&&) noexcept = default; - ComputePipeline(ComputePipeline&&) noexcept = default; + ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; + ComputePipeline(ComputePipeline&&) noexcept = delete; ComputePipeline& operator=(const ComputePipeline&) = delete; ComputePipeline(const ComputePipeline&) = delete; - void ConfigureBufferCache(BufferCache& buffer_cache); - void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); - - [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); - - [[nodiscard]] VkPipeline Handle() const noexcept { - return *pipeline; - } - - [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { - return *pipeline_layout; - } + void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: - VKUpdateDescriptorQueue* update_descriptor_queue; + VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; vk::ShaderModule spv_module; @@ -54,6 +47,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 278509bf0..ddc08b8c4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -112,13 +112,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, BufferCache& buffer_cache_, TextureCache& texture_cache_, const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state, + const FixedPipelineState& state_, std::array stages, const std::array& infos) - : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, - buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, - update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, scheduler{scheduler_}, + update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ + std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -128,8 +130,17 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; - MakePipeline(device, state, render_pass); + auto func{[this, &device, &render_pass_cache] { + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, render_pass); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (worker_thread) { + worker_thread->QueueWork(std::move(func)); + } else { + func(); + } } void GraphicsPipeline::Configure(bool is_indexed) { @@ -138,67 +149,72 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector image_view_indices; static_vector samplers; - texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeGraphicsDescriptors(); - const auto& regs{maxwell3d->regs}; + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache->UnbindGraphicsStorageBuffers(stage); + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); ++index; } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; for (const auto& desc : info.texture_descriptors) { const u32 cbuf_index{desc.cbuf_index}; const u32 cbuf_offset{desc.cbuf_offset}; ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory->Read(addr)}; + const u32 raw_handle{gpu_memory.Read(addr)}; const TextureHandle handle(raw_handle, via_header_index); image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache->UpdateGraphicsBuffers(is_indexed); - texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.UpdateGraphicsBuffers(is_indexed); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - buffer_cache->BindHostGeometryBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - buffer_cache->BindHostStageBuffers(stage); + buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - *texture_cache, *update_descriptor_queue, index); + texture_cache, update_descriptor_queue, index); } - texture_cache->UpdateRenderTargets(false); - scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); - - scheduler->BindGraphicsPipeline(*pipeline); + texture_cache.UpdateRenderTargets(false); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + if (!building_flag.test()) { + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + if (scheduler.UpdateGraphicsPipeline(this)) { + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + }); + } if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); - scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ba1d34a83..4e0583157 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,13 +5,15 @@ #pragma once #include +#include +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -25,34 +27,34 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline() = default; explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, - TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + BufferCache& buffer_cache, TextureCache& texture_cache, + const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const FixedPipelineState& state, std::array stages, const std::array& infos); void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; - GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; + GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; private: - void MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass); + void MakePipeline(const Device& device, VkRenderPass render_pass); - Tegra::Engines::Maxwell3D* maxwell3d{}; - Tegra::MemoryManager* gpu_memory{}; - TextureCache* texture_cache{}; - BufferCache* buffer_cache{}; - VKScheduler* scheduler{}; - VKUpdateDescriptorQueue* update_descriptor_queue{}; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; + TextureCache& texture_cache; + BufferCache& buffer_cache; + VKScheduler& scheduler; + VKUpdateDescriptorQueue& update_descriptor_queue; + const FixedPipelineState state; std::array spv_modules; std::array stage_infos; @@ -61,6 +63,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30a707599..e3d9debf4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -518,9 +518,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - Common::ThreadWorker worker(11, "PipelineBuilder"); - std::mutex cache_mutex; struct { + std::mutex mutex; size_t total{0}; size_t built{0}; bool has_loaded{false}; @@ -542,51 +541,53 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } u32 num_envs{}; file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - auto envs{std::make_shared>(num_envs)}; - for (FileEnvironment& env : *envs) { + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { env.Deserialize(file); } - if (envs->front().ShaderStage() == Shader::Stage::Compute) { + if (envs.front().ShaderStage() == Shader::Stage::Compute) { ComputePipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; - ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; compute_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; - for (auto& env : *envs) { + for (auto& env : envs) { env_ptrs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } ++state.total; } { - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); state.has_loaded = true; } - worker.WaitForRequests(); + workers.WaitForRequests(); } size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -619,7 +620,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -662,10 +663,10 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateGraphicsPipeline(); - return &pipeline; + return pipeline.get(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -691,10 +692,10 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateComputePipeline(key, shader); - return &pipeline; + return pipeline.get(); } bool PipelineCache::RefreshStages() { @@ -743,9 +744,9 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr c return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, - const GraphicsPipelineCacheKey& key, - std::span envs) { +std::unique_ptr PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -783,12 +784,14 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, modules[stage_index].SetObjectNameEXT(name.c_str()); } } - return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, - std::move(modules), infos); + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, + update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), + infos); } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { +std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; @@ -809,22 +812,22 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { generic_envs.push_back(&env); envs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); } return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader) { +std::unique_ptr PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(key, std::array{&env}, pipeline_cache_filename); @@ -832,9 +835,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineCacheKey& key, - Shader::Environment& env) const { +std::unique_ptr PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -846,8 +849,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; spv_module.SetObjectNameEXT(name.c_str()); } - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - std::move(spv_module)}; + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique(device, descriptor_pool, update_descriptor_queue, + thread_worker, program.info, std::move(spv_module)); } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b55e14189..609f00898 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -14,6 +14,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -145,16 +146,19 @@ private: const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - GraphicsPipeline CreateGraphicsPipeline(); + std::unique_ptr CreateGraphicsPipeline(); - GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs); + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel); - ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, - Shader::Environment& env) const; + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env, + bool build_in_parallel); Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); @@ -174,13 +178,15 @@ private: GraphicsPipelineCacheKey graphics_key{}; std::array shader_infos{}; - std::unordered_map compute_cache; - std::unordered_map graphics_cache; + std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; std::string pipeline_cache_filename; + + Common::ThreadWorker workers; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7d9927dd..f0bd4b8af 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -276,22 +276,11 @@ void RasterizerVulkan::DispatchCompute() { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - update_descriptor_queue.Acquire(); - pipeline->ConfigureBufferCache(buffer_cache); - pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); - const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; - const VkPipeline pipeline_handle{pipeline->Handle()}; - const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; - scheduler.Record( - [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, - descriptor_set, nullptr); - cmdbuf.Dispatch(dim[0], dim[1], dim[2]); - }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..25a4933e5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { +bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { if (state.graphics_pipeline == pipeline) { - return; + return false; } state.graphics_pipeline = pipeline; - Record([pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - }); + return true; } void VKScheduler::WorkerThread() { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + Common::SetCurrentThreadName("yuzu:VulkanWorker"); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..a40bb8bcd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -22,6 +22,7 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; +class GraphicsPipeline; class StateTracker; class VKQueryCache; @@ -52,8 +53,8 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); - /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(VkPipeline pipeline); + /// Update the pipeline to the current execution context. + bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -170,7 +171,7 @@ private: VkRenderPass renderpass = nullptr; VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; + GraphicsPipeline* graphics_pipeline = nullptr; }; void WorkerThread(); -- cgit v1.2.3 From 8771639d1e97cf2224657c0d2ee87d800a784ac8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 03:15:28 -0300 Subject: vulkan: Create pipeline layouts in separate threads --- src/video_core/renderer_vulkan/pipeline_helper.h | 72 ++++++++++++---------- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 19 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 26 ++++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 3 +- .../renderer_vulkan/vk_update_descriptor.cpp | 4 +- .../renderer_vulkan/vk_update_descriptor.h | 2 +- 7 files changed, 65 insertions(+), 63 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 0a59aa659..eebe5d569 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -35,49 +35,52 @@ struct TextureHandle { u32 sampler; }; -struct DescriptorLayoutTuple { - vk::DescriptorSetLayout descriptor_set_layout; - vk::PipelineLayout pipeline_layout; - vk::DescriptorUpdateTemplateKHR descriptor_update_template; -}; - class DescriptorLayoutBuilder { public: - DescriptorLayoutTuple Create(const vk::Device& device) { - DescriptorLayoutTuple result; - if (!bindings.empty()) { - result.descriptor_set_layout = device.CreateDescriptorSetLayout({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }); + DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + + vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + if (bindings.empty()) { + return nullptr; } - result.pipeline_layout = device.CreatePipelineLayout({ + return device->CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); + } + + vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) const { + if (entries.empty()) { + return nullptr; + } + return device->CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = pipeline_layout, + .set = 0, + }); + } + + vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { + return device->CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, - .setLayoutCount = result.descriptor_set_layout ? 1U : 0U, - .pSetLayouts = bindings.empty() ? nullptr : result.descriptor_set_layout.address(), + .setLayoutCount = descriptor_set_layout ? 1U : 0U, + .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr, }); - if (!entries.empty()) { - result.descriptor_update_template = device.CreateDescriptorUpdateTemplateKHR({ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, - .pNext = nullptr, - .flags = 0, - .descriptorUpdateEntryCount = static_cast(entries.size()), - .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, - .descriptorSetLayout = *result.descriptor_set_layout, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .pipelineLayout = *result.pipeline_layout, - .set = 0, - }); - } - return result; } void Add(const Shader::Info& info, VkShaderStageFlags stage) { @@ -113,6 +116,7 @@ private: offset += sizeof(DescriptorUpdateEntry); } + const vk::Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a444d55d3..760857839 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -237,7 +237,7 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( return nullptr; } const VkDescriptorSet set = descriptor_allocator->Commit(); - update_descriptor_queue.Send(*descriptor_template, set); + update_descriptor_queue.Send(descriptor_template.address(), set); return set; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 1c3249e3c..fb19bb4b9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -17,13 +17,6 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { -DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& info) { - DescriptorLayoutBuilder builder; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - return builder.Create(device.GetLogical()); -} -} // Anonymous namespace ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, @@ -31,10 +24,12 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip vk::ShaderModule spv_module_) : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutTuple tuple{CreateLayout(device, info)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); auto func{[this, &device] { @@ -128,7 +123,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ddc08b8c4..d17b79e02 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -27,8 +27,8 @@ using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -DescriptorLayoutTuple CreateLayout(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder; +DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { + DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -39,7 +39,7 @@ DescriptorLayoutTuple CreateLayout(const Device& device, std::span @@ -124,13 +124,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutTuple tuple{CreateLayout(device, stage_infos)}; - descriptor_set_layout = std::move(tuple.descriptor_set_layout); - pipeline_layout = std::move(tuple.pipeline_layout); - descriptor_update_template = std::move(tuple.descriptor_update_template); + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - auto func{[this, &device, &render_pass_cache] { + auto func{[this, &device, &render_pass_cache, builder] { + const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; + pipeline_layout = builder.CreatePipelineLayout(set_layout); + descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); building_flag.test_and_set(); @@ -206,11 +208,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, - nullptr); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); }); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e3d9debf4..597261964 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -620,7 +620,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + workers(11, "yuzu:PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..bea9b8012 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,12 +36,12 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, +void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set) { const void* const data = upload_start; const vk::Device* const logical = &device.GetLogical(); scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, update_template, data); + logical->UpdateDescriptorSet(set, *update_template, data); }); } diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..82bc9920c 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,7 @@ public: void Acquire(); - void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); + void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ -- cgit v1.2.3 From d0a529683a2e5a693b53c6f24f6816c06f8f7e65 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 04:09:09 -0300 Subject: vulkan: Serialize pipelines on a separate thread --- .../renderer_vulkan/vk_pipeline_cache.cpp | 130 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 + 2 files changed, 64 insertions(+), 67 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 597261964..79cd204c7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -61,6 +61,33 @@ public: ~GenericEnvironment() override = default; + u32 TextureBoundBuffer() const final { + return texture_bound; + } + + u32 LocalMemorySize() const final { + return local_memory_size; + } + + u32 SharedMemorySize() const final { + return shared_memory_size; + } + + std::array WorkgroupSize() const final { + return workgroup_size; + } + + u64 ReadInstruction(u32 address) final { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); + } + std::optional Analyze() { const std::optional size{TryFindSize()}; if (!size) { @@ -97,26 +124,10 @@ public: return Common::CityHash128(data.get(), size); } - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(ReadSize())}; - const auto data{std::make_unique(code_size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); - + const u64 code_size{static_cast(CachedSize())}; const u64 num_texture_types{static_cast(texture_types.size())}; const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - const u32 local_memory_size{LocalMemorySize()}; - const u32 texture_bound{TextureBoundBuffer()}; file.write(reinterpret_cast(&code_size), sizeof(code_size)) .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) @@ -124,10 +135,10 @@ public: .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .write(reinterpret_cast(&read_highest), sizeof(read_highest)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) .write(reinterpret_cast(&stage), sizeof(stage)) - .write(data.get(), code_size); + .write(reinterpret_cast(code.data()), code_size); for (const auto [key, type] : texture_types) { file.write(reinterpret_cast(&key), sizeof(key)) .write(reinterpret_cast(&type), sizeof(type)); @@ -137,8 +148,6 @@ public: .write(reinterpret_cast(&type), sizeof(type)); } if (stage == Shader::Stage::Compute) { - const std::array workgroup_size{WorkgroupSize()}; - const u32 shared_memory_size{SharedMemorySize()}; file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); @@ -220,6 +229,11 @@ protected: std::unordered_map texture_types; std::unordered_map cbuf_values; + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + u32 read_lowest = std::numeric_limits::max(); u32 read_highest = 0; @@ -270,6 +284,10 @@ public: UNREACHABLE_MSG("Invalid program={}", program); break; } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; } ~GraphicsEnvironment() override = default; @@ -294,24 +312,6 @@ public: cbuf.address, cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return maxwell3d->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const u64 size{sph.LocalMemorySize()}; - ASSERT(size <= std::numeric_limits::max()); - return static_cast(size); - } - - u32 SharedMemorySize() const override { - throw Shader::LogicError("Requesting shared memory size in graphics stage"); - } - - std::array WorkgroupSize() const override { - throw Shader::LogicError("Requesting workgroup size in a graphics stage"); - } - private: Tegra::Engines::Maxwell3D* maxwell3d{}; size_t stage_index{}; @@ -325,7 +325,12 @@ public: u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; } ~ComputeEnvironment() override = default; @@ -351,25 +356,6 @@ public: cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); } - u32 TextureBoundBuffer() const override { - return kepler_compute->regs.tex_cb_index; - } - - u32 LocalMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.local_pos_alloc; - } - - u32 SharedMemorySize() const override { - const auto& qmd{kepler_compute->launch_description}; - return qmd.shared_alloc; - } - - std::array WorkgroupSize() const override { - const auto& qmd{kepler_compute->launch_description}; - return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - private: Tegra::Engines::KeplerCompute* kepler_compute{}; }; @@ -621,7 +607,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder") { + workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -796,7 +782,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; - boost::container::static_vector generic_envs; boost::container::static_vector envs; const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; @@ -810,13 +795,22 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - generic_envs.push_back(&env); envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; - if (!pipeline_cache_filename.empty()) { - SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); - } + if (pipeline_cache_filename.empty()) { + return pipeline; + } + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + boost::container::static_vector + env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] != u128{}) { + env_ptrs.push_back(&envs[index]); + } + } + SerializePipeline(key, env_ptrs, pipeline_cache_filename); + }); return pipeline; } @@ -830,8 +824,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); } return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 609f00898..343ea1554 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -187,6 +187,7 @@ private: std::string pipeline_cache_filename; Common::ThreadWorker workers; + Common::ThreadWorker serialization_thread; }; } // namespace Vulkan -- cgit v1.2.3 From d819ba4489b90955286341c739083e638173b938 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 08:34:45 +0200 Subject: shader: Implement ViewportIndex --- src/shader_recompiler/backend/spirv/emit_context.cpp | 6 ++++++ src/shader_recompiler/backend/spirv/emit_context.h | 3 +++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 10 ++++++++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 10 ++++++++-- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 8 files changed, 33 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3b3fea50c..a8041aadc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -509,6 +509,12 @@ void EmitContext::DefineOutputs(const Info& info) { const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); } + if (info.stores_viewport_index && !ignore_viewport_layer) { + if (stage == Stage::Fragment) { + throw NotImplementedException("Storing ViewportIndex in Fragment stage"); + } + viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); + } for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (info.stores_generics[i]) { output_generics[i] = DefineOutput(*this, F32[4]); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index dc4e1227a..1573c2560 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -106,6 +106,7 @@ public: Id front_face{}; Id point_coord{}; Id clip_distances{}; + Id viewport_index{}; Id fswzadd_lut_a{}; Id fswzadd_lut_b{}; @@ -133,6 +134,8 @@ public: std::vector interfaces; + bool ignore_viewport_layer{}; + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index b8e3b8527..cc6b98f7e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -225,6 +225,16 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } + if (info.stores_viewport_index) { + ctx.AddCapability(spv::Capability::MultiViewport); + if (profile.support_viewport_index_layer_non_geometry && + ctx.stage == Shader::Stage::VertexB) { + ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); + ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); + } else { + ctx.ignore_viewport_layer = true; + } + } if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2eaeb29de..e42407f1f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -29,7 +29,7 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } -Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { +std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 element{static_cast(attr) % 4}; const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { @@ -57,6 +57,8 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const Id clip_num{ctx.Constant(ctx.U32[1], index)}; return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); } + case IR::Attribute::ViewportIndex: + return ctx.ignore_viewport_layer ? std::nullopt : std::optional{ctx.viewport_index}; default: throw NotImplementedException("Read attribute {}", attr); } @@ -204,7 +206,11 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { - ctx.OpStore(OutputAttrPointer(ctx, attr), value); + auto output = OutputAttrPointer(ctx, attr); + if (!output) { + return; + } + ctx.OpStore(*output, value); } void EmitGetAttributeIndexed(EmitContext&) { diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 50ffc4c19..514de6838 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -81,6 +81,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ClipDistance7: info.stores_clip_distance = true; break; + case IR::Attribute::ViewportIndex: + info.stores_viewport_index = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f4b94896c..e13cb948a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -35,6 +35,7 @@ struct Profile { bool support_fp64_signed_zero_nan_preserve{}; bool support_explicit_workgroup_layout{}; bool support_vote{}; + bool support_viewport_index_layer_non_geometry{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a62ad1e79..a5bff40bb 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -82,6 +82,7 @@ struct Info { bool stores_position{}; bool stores_point_size{}; bool stores_clip_distance{}; + bool stores_viewport_index{}; bool uses_fp16{}; bool uses_fp64{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 79cd204c7..1f308eec2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -631,6 +631,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, + .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 12f5f320985824d1ebad587ebecb0f8406143ebc Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Thu, 1 Apr 2021 09:21:53 +0200 Subject: shader: Mark SSBOs as written when they are --- .../global_memory_to_storage_buffer_pass.cpp | 31 ++++++++++++++++++++-- src/shader_recompiler/shader_info.h | 1 + .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 4 files changed, 32 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 1faa1ec88..d4bae249b 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,7 @@ using StorageBufferSet = using StorageInstVector = boost::container::small_vector; using VisitedBlocks = boost::container::flat_set, boost::container::small_vector>; +using StorageWritesMap = std::map; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -69,6 +71,22 @@ bool IsGlobalMemory(const IR::Inst& inst) { } } +/// Returns true when the instruction is a global memory instruction +bool IsGlobalMemoryWrite(const IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::WriteGlobalS8: + case IR::Opcode::WriteGlobalU8: + case IR::Opcode::WriteGlobalS16: + case IR::Opcode::WriteGlobalU16: + case IR::Opcode::WriteGlobal32: + case IR::Opcode::WriteGlobal64: + case IR::Opcode::WriteGlobal128: + return true; + default: + return false; + } +} + /// Converts a global memory opcode to its storage buffer equivalent IR::Opcode GlobalToStorage(IR::Opcode opcode) { switch (opcode) { @@ -248,7 +266,7 @@ std::optional Track(IR::Block* block, const IR::Value& value, /// Collects the storage buffer used by a global memory instruction and the instruction itself void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, - StorageInstVector& to_replace) { + StorageInstVector& to_replace, StorageWritesMap& writes_map) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -277,6 +295,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s } } // Collect storage buffer and the instruction + const bool is_a_write = IsGlobalMemoryWrite(inst); + auto it = writes_map.find(*storage_buffer); + if (it == writes_map.end()) { + writes_map[*storage_buffer] = is_a_write; + } else { + it->second = it->second || is_a_write; + } storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, @@ -350,13 +375,14 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; + StorageWritesMap writes_map; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsGlobalMemory(inst)) { continue; } - CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_map); } } Info& info{program.info}; @@ -366,6 +392,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index{storage_buffer.index}, .cbuf_offset{storage_buffer.offset}, .count{1}, + .is_written{writes_map[storage_buffer]}, }); ++storage_index; } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a5bff40bb..d4d039eaf 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -59,6 +59,7 @@ struct StorageBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + bool is_written; }; struct Info { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index fb19bb4b9..6707842ab 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -75,7 +75,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d17b79e02..e8c3a5624 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -163,7 +163,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + desc.is_written); ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; -- cgit v1.2.3 From 480dc0d5e68fd1c79345e93216013a1d2e172c70 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 3 Apr 2021 02:27:25 +0200 Subject: vk_pipeline_cache: Small fixes to the pipeline cache --- .../renderer_vulkan/vk_pipeline_cache.cpp | 24 +++++++++++++--------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1f308eec2..3111165fb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,11 +539,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } else { @@ -558,11 +560,13 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + { + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } } }); } -- cgit v1.2.3 From 6ff2e9ba097f3619c21d2e547570e1fbaae8d6ee Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 05:19:13 -0300 Subject: vk_pipeline_cache: Remove unnecesary scope in pipeline cache locking --- .../renderer_vulkan/vk_pipeline_cache.cpp | 27 ++++++++++------------ 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3111165fb..f88ab67ae 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -539,13 +539,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - { - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { @@ -560,13 +558,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - { - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } @@ -635,7 +631,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), .support_vote = true, - .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_index_layer_non_geometry = + device.IsExtShaderViewportIndexLayerSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 5ed68e83db39e1f6790a625529f10f4e1d5a8f89 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 21:41:49 -0300 Subject: shader: Remove atomic flags and use mutex + cond variable for pipelines --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 15 ++++++++++----- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 7 ++++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 7 ++++++- 4 files changed, 32 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6707842ab..0bb5b852d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -55,8 +55,9 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip .basePipelineHandle = 0, .basePipelineIndex = 0, }); - building_flag.test_and_set(); - building_flag.notify_all(); + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -75,7 +76,8 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, desc.is_written); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); @@ -112,9 +114,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, update_descriptor_queue, image_index); - if (!building_flag.test()) { + if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } scheduler.Record([this](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 02da504f7..104e6cc85 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,6 +4,8 @@ #pragma once +#include +#include #include #include "common/common_types.h" @@ -47,7 +49,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e8c3a5624..67de3cb79 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -135,8 +135,10 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; MakePipeline(device, render_pass); - building_flag.test_and_set(); - building_flag.notify_all(); + + std::lock_guard lock{build_mutex}; + is_built = true; + build_condvar.notify_one(); }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); @@ -196,8 +198,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); - if (!building_flag.test()) { - scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + if (!is_built.load(std::memory_order::relaxed)) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { + std::unique_lock lock{build_mutex}; + build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + }); } if (scheduler.UpdateGraphicsPipeline(this)) { scheduler.Record([this](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4e0583157..7d14d2378 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -6,6 +6,8 @@ #include #include +#include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -63,7 +65,10 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; - std::atomic_flag building_flag{}; + + std::condition_variable build_condvar; + std::mutex build_mutex; + std::atomic_bool is_built{false}; }; } // namespace Vulkan -- cgit v1.2.3 From 5b3c6d59c2c92ac388530740f8008f1b9764c14d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 3 Apr 2021 22:28:07 -0300 Subject: vk_compute_pass: Fix compute passes --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 39 ++++++++++------------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 ++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 - 3 files changed, 19 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 760857839..2cfe9d4bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,27 +206,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); - /* - FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, .layout = *layout, .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); - */ } VKComputePass::~VKComputePass() = default; @@ -262,8 +258,7 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -271,8 +266,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); @@ -319,8 +314,8 @@ std::pair QuadIndexedPass::Assemble( const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, - num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -329,9 +324,9 @@ std::pair QuadIndexedPass::Assemble( .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; const std::array push_constants = {base_vertex, index_shift}; - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 67de3cb79..a0ef0e98b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -189,6 +189,8 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.BindHostGeometryBuffers(is_indexed); + update_descriptor_queue.Acquire(); + size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0bd4b8af..0292a1b94 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -172,7 +172,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { if (!pipeline) { return; } - update_descriptor_queue.Acquire(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); -- cgit v1.2.3 From 72daa2a039d58d23b0dca035bb5f6af8b10ce97b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 5 Apr 2021 08:56:58 +0200 Subject: shader: Fix ShadowCube declaration type, set number of pipeline threads based on hardware --- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 5ef637fe7..002b305dc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -45,7 +45,7 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { case TextureType::ShadowCube: return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); case TextureType::ShadowArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format); } throw InvalidArgument("Invalid texture type {}", desc.type); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f88ab67ae..088de7001 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "common/bit_cast.h" @@ -607,7 +608,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { + workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ -- cgit v1.2.3 From bfeeb23ddce9f3531a834c257bd8af05c42ed194 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 5 Apr 2021 19:15:45 -0300 Subject: vk_pipeline_cache: Fix num of pipeline workers on weird platforms --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 088de7001..25f592b8a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -608,7 +608,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, - workers(std::thread::hardware_concurrency() - 1, "yuzu:PipelineBuilder"), + workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; -- cgit v1.2.3 From 1f3eb601acdcdfa4c119cffbf36b5792147b893f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 02:56:15 -0300 Subject: shader: Implement texture buffers --- .../backend/spirv/emit_context.cpp | 29 ++++++++ src/shader_recompiler/backend/spirv/emit_context.h | 5 ++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 24 +++++-- src/shader_recompiler/ir_opt/texture_pass.cpp | 80 +++++++++++++++++----- src/shader_recompiler/shader_info.h | 9 +++ src/video_core/renderer_vulkan/pipeline_helper.h | 10 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 15 ++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 10 files changed, 154 insertions(+), 35 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d39ea373..d01633628 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -46,6 +46,8 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); case TextureType::ShadowArrayCube: return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format); + case TextureType::Buffer: + break; } throw InvalidArgument("Invalid texture type {}", desc.type); } @@ -129,6 +131,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextures(program.info, binding); + DefineTextureBuffers(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -541,6 +544,32 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + texture_buffers.insert(texture_buffers.end(), desc.count, id); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 7a2ac0511..2a10e94e5 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -90,9 +90,13 @@ public: Id storage_u32{}; + Id image_buffer_type{}; + Id sampled_texture_buffer_type{}; + std::array cbufs{}; std::array ssbos{}; std::vector textures; + std::vector texture_buffers; Id workgroup_id{}; Id local_invocation_id{}; @@ -151,6 +155,7 @@ private: void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); + void DefineTextureBuffers(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 191380db0..32512a0e5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -249,6 +249,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); + ctx.AddCapability(spv::Capability::SampledBuffer); } Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index fc40615af..525f67c6e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -128,12 +128,18 @@ Id Texture(EmitContext& ctx, const IR::Value& index) { throw NotImplementedException("Indirect texture sample"); } -Id TextureImage(EmitContext& ctx, const IR::Value& index) { - if (index.IsImmediate()) { +Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect texture sample"); + } + if (info.type == TextureType::Buffer) { + const Id sampler_id{ctx.texture_buffers.at(index.U32())}; + const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; + return ctx.OpImage(ctx.image_buffer_type, id); + } else { const TextureDefinition def{ctx.textures.at(index.U32())}; return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } - throw NotImplementedException("Indirect texture sample"); } Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { @@ -297,17 +303,22 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#pragma optimize("", off) + Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { const auto info{inst->Flags()}; + if (info.type == TextureType::Buffer) { + lod = Id{}; + } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, index), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, index, info), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { const auto info{inst->Flags()}; - const Id image{TextureImage(ctx, index)}; + const Id image{TextureImage(ctx, index, info)}; const Id zero{ctx.u32_zero_value}; const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { @@ -331,6 +342,9 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i case TextureType::ShadowArrayCube: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), mips()); + case TextureType::Buffer: + return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, + zero, mips()); } throw LogicError("Unspecified image type {}", info.type.Value()); } diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index bcb94ce4d..290ce4179 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -147,24 +147,39 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {} + explicit Descriptors(TextureDescriptors& texture_descriptors_, + TextureBufferDescriptors& texture_buffer_descriptors_) + : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{ + texture_buffer_descriptors_} {} + + u32 Add(const TextureDescriptor& desc) { + return Add(texture_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + }); + } + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } - u32 Add(const TextureDescriptor& descriptor) { +private: + template + static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { // TODO: Handle arrays - auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) { - return descriptor.cbuf_index == existing.cbuf_index && - descriptor.cbuf_offset == existing.cbuf_offset && - descriptor.type == existing.type; - })}; + const auto it{std::ranges::find_if(descriptors, pred)}; if (it != descriptors.end()) { return static_cast(std::distance(descriptors.begin(), it)); } - descriptors.push_back(descriptor); + descriptors.push_back(desc); return static_cast(descriptors.size()) - 1; } -private: - TextureDescriptors& descriptors; + TextureDescriptors& texture_descriptors; + TextureBufferDescriptors& texture_buffer_descriptors; }; } // Anonymous namespace @@ -185,7 +200,10 @@ void TexturePass(Environment& env, IR::Program& program) { std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { return lhs.cbuf.index < rhs.cbuf.index; }); - Descriptors descriptors{program.info.texture_descriptors}; + Descriptors descriptors{ + program.info.texture_descriptors, + program.info.texture_buffer_descriptors, + }; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays IR::Inst* const inst{texture_inst.inst}; @@ -193,16 +211,42 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - if (inst->Opcode() == IR::Opcode::ImageQueryDimensions) { + switch (inst->Opcode()) { + case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); + break; + case IR::Opcode::ImageFetch: + if (flags.type != TextureType::Color1D) { + break; + } + if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + // Replace with the bound texture type only when it's a texture buffer + // If the instruction is 1D and the bound type is 2D, don't change the code and let + // the rasterizer robustness handle it + // This happens on Fire Emblem: Three Houses + flags.type.Assign(TextureType::Buffer); + } + inst->SetFlags(flags); + break; + default: + break; + } + u32 index; + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, + .count{1}, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type{flags.type}, + .cbuf_index{cbuf.index}, + .cbuf_offset{cbuf.offset}, + .count{1}, + }); } - const u32 index{descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, - })}; inst->SetArg(0, IR::Value{index}); } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 41bb5b9a1..e6f0de8d8 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -29,6 +29,7 @@ enum class TextureType : u32 { Shadow3D, ShadowCube, ShadowArrayCube, + Buffer, }; enum class Interpolation { @@ -50,6 +51,13 @@ struct TextureDescriptor { }; using TextureDescriptors = boost::container::small_vector; +struct TextureBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using TextureBufferDescriptors = boost::container::small_vector; + struct ConstantBufferDescriptor { u32 index; u32 count; @@ -112,6 +120,7 @@ struct Info { constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; TextureDescriptors texture_descriptors; + TextureBufferDescriptors texture_buffer_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index eebe5d569..decf0d32c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -93,6 +93,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } } private: @@ -146,6 +149,8 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { case Shader::TextureType::ColorArrayCube: case Shader::TextureType::ShadowArrayCube: return VideoCommon::ImageViewType::CubeArray; + case Shader::TextureType::Buffer: + break; } UNREACHABLE_MSG("Invalid texture type {}", type); return {}; @@ -161,6 +166,11 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samp update_descriptor_queue.AddSampledImage(vk_image_view, sampler); ++index; } + for (const auto& desc : info.texture_buffer_descriptors) { + ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); + ++index; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 0bb5b852d..9922cbd0f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -93,20 +93,23 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& launch_desc{kepler_compute.launch_description}; const auto& cbufs{launch_desc.const_buffer_config}; const bool via_header_index{launch_desc.linked_tsc}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a0ef0e98b..afdd8b371 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,19 +169,23 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - for (const auto& desc : info.texture_descriptors) { - const u32 cbuf_index{desc.cbuf_index}; - const u32 cbuf_offset{desc.cbuf_offset}; + const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; const u32 raw_handle{gpu_memory.Read(addr)}; - - const TextureHandle handle(raw_handle, via_header_index); + return TextureHandle(raw_handle, via_header_index); + }}; + for (const auto& desc : info.texture_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25f592b8a..23bf84a92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -212,7 +212,7 @@ protected: case Tegra::Texture::TextureType::Texture2DArray: return Shader::TextureType::ColorArray2D; case Tegra::Texture::TextureType::Texture1DBuffer: - throw Shader::NotImplementedException("Texture buffer"); + return Shader::TextureType::Buffer; case Tegra::Texture::TextureType::TextureCubeArray: return Shader::TextureType::ColorArrayCube; default: -- cgit v1.2.3 From e9a91bc5cc2c39b476ba8946f66930f5ab5608b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 20:14:55 -0300 Subject: shader: Interact texture buffers with buffer cache --- .../backend/spirv/emit_context.cpp | 54 ++++---- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/shader_info.h | 2 +- src/video_core/buffer_cache/buffer_cache.h | 138 +++++++++++++++++++++ src/video_core/renderer_opengl/gl_buffer_cache.h | 1 + .../renderer_opengl/gl_texture_cache.cpp | 4 + src/video_core/renderer_opengl/gl_texture_cache.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 26 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 57 ++++++--- src/video_core/renderer_vulkan/vk_buffer_cache.h | 18 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 30 +++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 +++-- .../renderer_vulkan/vk_texture_cache.cpp | 63 ++-------- src/video_core/renderer_vulkan/vk_texture_cache.h | 30 ++--- src/video_core/texture_cache/image_view_base.cpp | 9 ++ src/video_core/texture_cache/image_view_base.h | 1 + src/video_core/texture_cache/texture_cache.h | 13 +- 17 files changed, 333 insertions(+), 148 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d01633628..b738e00cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -130,8 +130,8 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineSharedMemory(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); - DefineTextures(program.info, binding); DefineTextureBuffers(program.info, binding); + DefineTextures(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -516,6 +516,32 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + texture_buffers.insert(texture_buffers.end(), desc.count, id); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { @@ -544,32 +570,6 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } -void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { - if (info.texture_buffer_descriptors.empty()) { - return; - } - const spv::ImageFormat format{spv::ImageFormat::Unknown}; - image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); - sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); - - const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; - texture_buffers.reserve(info.texture_buffer_descriptors.size()); - for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { - if (desc.count != 1) { - throw NotImplementedException("Array of texture buffers"); - } - const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - texture_buffers.insert(texture_buffers.end(), desc.count, id); - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - binding += desc.count; - } -} - void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 2a10e94e5..f1ac4430c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -154,8 +154,8 @@ private: void DefineSharedMemory(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); - void DefineTextures(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e6f0de8d8..4cc731198 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -119,8 +119,8 @@ struct Info { boost::container::static_vector constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; - TextureDescriptors texture_descriptors; TextureBufferDescriptors texture_buffer_descriptors; + TextureDescriptors texture_descriptors; }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..6701aab82 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,11 +43,14 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; using namespace Common::Literals; @@ -66,6 +70,7 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +101,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -142,11 +151,21 @@ public: void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -254,12 +273,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +297,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +307,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +350,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span ImmediateBuffer(size_t wanted_capacity); @@ -347,10 +377,12 @@ private: std::array vertex_buffers; std::array, NUM_STAGES> uniform_buffers; std::array, NUM_STAGES> storage_buffers; + std::array, NUM_STAGES> texture_buffers; std::array transform_feedback_buffers; std::array compute_uniform_buffers; std::array compute_storage_buffers; + std::array compute_texture_buffers; std::array enabled_uniform_buffers{}; u32 enabled_compute_uniform_buffers = 0; @@ -360,6 +392,9 @@ private: u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; + std::array enabled_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + std::array fast_bound_uniform_buffers{}; std::array uniform_cache_hits{}; @@ -619,6 +654,7 @@ void BufferCache

::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template @@ -626,6 +662,7 @@ void BufferCache

::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template @@ -660,6 +697,18 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; +} + +template +void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; @@ -680,6 +729,18 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; +} + +template +void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format) { + enabled_compute_texture_buffers |= 1U << tbo_index; + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { @@ -988,6 +1049,26 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { }); } +template +void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { + u32 binding_index = 0; + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1050,6 +1131,26 @@ void BufferCache

::BindHostComputeStorageBuffers() { }); } +template +void BufferCache

::BindHostComputeTextureBuffers() { + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { @@ -1060,6 +1161,7 @@ void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1067,6 +1169,7 @@ template void BufferCache

::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template @@ -1166,6 +1269,14 @@ void BufferCache

::UpdateStorageBuffers(size_t stage) { }); } +template +void BufferCache

::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1227,6 +1338,14 @@ void BufferCache

::UpdateComputeStorageBuffers() { }); } +template +void BufferCache

::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { Buffer& buffer = slot_buffers[buffer_id]; @@ -1581,6 +1700,25 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s return binding; } +template +typename BufferCache

::TextureBufferBinding BufferCache

::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + template std::span BufferCache

::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fe91aa452..ddcce5e97 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -155,6 +155,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ff0f03e99..a8bf84218 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1016,6 +1016,10 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info) + : VideoCommon::ImageViewBase{info, view_info} {} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf3b789e3..817b0e650 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -182,6 +182,8 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index decf0d32c..cff93cc60 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,7 +24,8 @@ struct TextureHandle { [[likely]] if (via_header_index) { image = data; sampler = data; - } else { + } + else { const Tegra::Texture::TextureHandle handle{data}; image = handle.tic_id; sampler = via_header_index ? image : handle.tsc_id.Value(); @@ -90,12 +91,12 @@ public: for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); } + for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } } private: @@ -156,20 +157,15 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { return {}; } -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, - const ImageId* image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue) { + image_view_ids += info.texture_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{samplers[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - ++index; - } - for (const auto& desc : info.texture_buffer_descriptors) { - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - ++index; } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..cdda56ab1 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -67,25 +67,50 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) - : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_) { - buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); + : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), + device{&runtime.device}, + buffer{device->GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = SizeBytes(), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + })}, + commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); +} + +VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return *it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .handle = device->GetLogical().CreateBufferView({ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = *buffer, + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, + .offset = offset, + .range = size, + }), + }); + return *views.back().handle; } BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..ea17406dc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -9,6 +9,7 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,6 +27,8 @@ public: explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_); + [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } @@ -35,8 +38,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + vk::BufferView handle; + }; + + const Device* device{}; vk::Buffer buffer; MemoryCommit commit; + std::vector views; }; class BufferCacheRuntime { @@ -87,6 +99,11 @@ public: BindBuffer(buffer, offset, size); } + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format) { + update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + } + private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { update_descriptor_queue.AddBuffer(buffer, offset, size); @@ -123,6 +140,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9922cbd0f..ac47b1f3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -80,8 +80,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, desc.is_written); ++ssbo_index; } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); texture_cache.SynchronizeComputeDescriptors(); @@ -99,6 +97,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -106,16 +108,26 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t image_index{}; - PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - update_descriptor_queue, image_index); + buffer_cache.UnbindComputeTextureBuffers(); + ImageId* texture_buffer_ids{image_view_ids.data()}; + size_t index{}; + for (const auto& desc : info.texture_buffer_descriptors) { + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), + image_view.format); + ++texture_buffer_ids; + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index afdd8b371..893258b4a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,6 +175,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -182,24 +186,37 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + size_t index{}; for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format); + ++index; + ++texture_buffer_index; } + texture_buffer_index += info.texture_descriptors.size(); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); - size_t index{}; + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - texture_cache, update_descriptor_queue, index); + PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, + update_descriptor_queue); } texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1bbc542a1..e42b091c5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,10 +15,10 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -162,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { - if (info.type != ImageType::Buffer) { - return vk::Buffer{}; - } - const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); - return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = info.size.width * bytes_per_block, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); -} - [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { switch (VideoCore::Surface::GetFormatType(format)) { case VideoCore::Surface::SurfaceType::ColorTexture: @@ -813,13 +794,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + image(MakeImage(runtime.device, info)), + commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (image) { - commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - } else { - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -828,11 +805,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } if (runtime.device.HasDebuggingToolAttached()) { - if (image) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } else { - buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, @@ -884,19 +857,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { - // TODO: Move this to another API - scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, map.offset); - const VkBuffer src_buffer = map.buffer; - const VkBuffer dst_buffer = *buffer; - scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - }); -} - void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -1032,19 +992,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI UNIMPLEMENTED(); break; case VideoCommon::ImageViewType::Buffer: - buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = image.Buffer(), - .format = format_info.format, - .offset = 0, // TODO: Redesign buffer cache to support this - .range = image.guest_size_bytes, - }); + UNREACHABLE(); break; } } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 189ee5a68..498e76a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -41,9 +41,9 @@ struct TextureCacheRuntime { void Finish(); - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, @@ -54,7 +54,7 @@ struct TextureCacheRuntime { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + bool CanAccelerateImageUpload(Image&) const noexcept { return false; } @@ -92,8 +92,6 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); - void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(const StagingBufferRef& map, std::span copies); @@ -101,10 +99,6 @@ public: return *image; } - [[nodiscard]] VkBuffer Buffer() const noexcept { - return *buffer; - } - [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { return aspect_mask; } @@ -121,7 +115,6 @@ public: private: VKScheduler* scheduler; vk::Image image; - vk::Buffer buffer; MemoryCommit commit; vk::ImageView image_view; std::vector storage_image_views; @@ -132,6 +125,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] VkImageView DepthView(); @@ -142,10 +137,6 @@ public: return *image_views[static_cast(query_type)]; } - [[nodiscard]] VkBufferView BufferView() const noexcept { - return *buffer_view; - } - [[nodiscard]] VkImage ImageHandle() const noexcept { return image_handle; } @@ -162,6 +153,14 @@ public: return samples; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); @@ -169,11 +168,12 @@ private: std::array image_views; vk::ImageView depth_view; vk::ImageView stencil_view; - vk::BufferView buffer_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e8d632f9e..450becbeb 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } } +ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) + : format{info.format}, type{ImageViewType::Buffer}, size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { + ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); +} + ImageViewBase::ImageViewBase(const NullImageParams&) {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 73954167e..903f715c5 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); + explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); explicit ImageViewBase(const NullImageParams&); [[nodiscard]] bool IsBuffer() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85ce06d56..5e8d99482 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -968,9 +968,6 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); - } else if (image.info.type == ImageType::Buffer) { - const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); @@ -993,7 +990,12 @@ ImageViewId TextureCache

::FindImageView(const TICEntry& config) { template ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { const ImageInfo info(config); - const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); if (!image_id) { return NULL_IMAGE_VIEW_ID; @@ -1801,6 +1803,9 @@ void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modifi return; } const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } PrepareImage(image_view.image_id, is_modification, invalidate); } -- cgit v1.2.3 From 0bb85f6a753c769266c95c4ba146b25b9eaaaffd Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Mon, 5 Apr 2021 22:25:22 -0400 Subject: shader_recompiler,video_core: Cleanup some GCC and Clang errors Mostly fixing unused *, implicit conversion, braced scalar init, fpermissive, and some others. Some Clang errors likely remain in video_core, and std::ranges is still a pertinent issue in shader_recompiler shader_recompiler: cmake: Force bracket depth to 1024 on Clang Increases the maximum fold expression depth thread_worker: Include condition_variable Don't use list initializers in control flow Co-authored-by: ReinUsesLisp --- src/common/thread_worker.h | 1 + src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +-- .../backend/spirv/emit_spirv_image.cpp | 11 +- .../backend/spirv/emit_spirv_warp.cpp | 2 +- src/shader_recompiler/file_environment.h | 2 +- src/shader_recompiler/frontend/ir/attribute.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 +- src/shader_recompiler/frontend/ir/condition.cpp | 6 +- src/shader_recompiler/frontend/ir/condition.h | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 +- .../frontend/ir/microinstruction.cpp | 16 +-- .../frontend/ir/microinstruction.h | 4 +- src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/ir/program.cpp | 2 - src/shader_recompiler/frontend/ir/value.cpp | 4 +- src/shader_recompiler/frontend/ir/value.h | 2 +- .../frontend/maxwell/control_flow.cpp | 140 +++++++++------------ src/shader_recompiler/frontend/maxwell/decode.cpp | 10 +- .../maxwell/indirect_branch_table_track.cpp | 10 +- .../frontend/maxwell/structured_control_flow.cpp | 3 +- .../frontend/maxwell/translate/impl/double_add.cpp | 6 +- .../translate/impl/double_fused_multiply_add.cpp | 6 +- .../maxwell/translate/impl/double_multiply.cpp | 6 +- .../maxwell/translate/impl/floating_point_add.cpp | 6 +- .../translate/impl/floating_point_compare.cpp | 3 +- .../impl/floating_point_compare_and_set.cpp | 6 +- .../floating_point_conversion_floating_point.cpp | 6 +- .../impl/floating_point_conversion_integer.cpp | 11 +- .../impl/floating_point_fused_multiply_add.cpp | 6 +- .../translate/impl/floating_point_min_max.cpp | 6 +- .../translate/impl/floating_point_multiply.cpp | 8 +- .../impl/floating_point_set_predicate.cpp | 6 +- .../translate/impl/floating_point_swizzled_add.cpp | 6 +- .../translate/impl/half_floating_point_add.cpp | 11 +- .../half_floating_point_fused_multiply_add.cpp | 11 +- .../impl/half_floating_point_multiply.cpp | 11 +- .../translate/impl/half_floating_point_set.cpp | 11 +- .../impl/half_floating_point_set_predicate.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 +- .../maxwell/translate/impl/integer_add.cpp | 1 - .../impl/integer_floating_point_conversion.cpp | 4 +- .../maxwell/translate/impl/load_constant.cpp | 2 +- .../translate/impl/load_store_local_shared.cpp | 9 +- .../maxwell/translate/impl/load_store_memory.cpp | 4 +- .../maxwell/translate/impl/texture_fetch.cpp | 2 +- .../translate/impl/texture_fetch_swizzled.cpp | 2 +- .../translate/impl/texture_gather_swizzled.cpp | 2 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../maxwell/translate/impl/texture_query.cpp | 2 +- .../maxwell/translate/impl/video_set_predicate.cpp | 1 - .../ir_opt/collect_shader_info_pass.cpp | 20 +-- .../ir_opt/constant_propagation_pass.cpp | 49 ++++---- .../global_memory_to_storage_buffer_pass.cpp | 42 +++---- .../ir_opt/identity_removal_pass.cpp | 3 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 4 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 32 ++--- src/shader_recompiler/ir_opt/verification_pass.cpp | 4 +- src/tests/common/unique_function.cpp | 2 + src/video_core/CMakeLists.txt | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 ++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 5 +- .../renderer_vulkan/vk_render_pass_cache.cpp | 2 - .../renderer_vulkan/vk_texture_cache.cpp | 2 +- 66 files changed, 308 insertions(+), 313 deletions(-) (limited to 'src/video_core') diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 0a975a869..cd0017726 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 22639fe13..551bf1c58 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -196,6 +196,8 @@ else() $<$:-Werror=unused-but-set-parameter> $<$:-Werror=unused-but-set-variable> -Werror=unused-variable + + $<$:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b738e00cc..0c114402b 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -116,7 +117,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie const std::string_view def_name_view( def_name.data(), fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + defs[static_cast(i)] = + sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 32512a0e5..355cf0ca8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -16,7 +16,7 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits {}; template struct FuncTraits { @@ -64,17 +64,20 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { template void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = FuncTraits; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - func(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } @@ -94,14 +97,14 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { } void EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ return Invoke<&Emit##name>(ctx, inst); #include "shader_recompiler/frontend/ir/opcodes.inc" #undef OPCODE } - throw LogicError("Invalid opcode {}", inst->Opcode()); + throw LogicError("Invalid opcode {}", inst->GetOpcode()); } Id TypeId(const EmitContext& ctx, IR::Type type) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index f0f8db8c3..815ca6299 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -43,11 +43,13 @@ public: // LOG_WARNING("Not all arguments in PTP are immediate, STUBBING"); return; } - const IR::Opcode opcode{values[0]->Opcode()}; - if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) { + const IR::Opcode opcode{values[0]->GetOpcode()}; + if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }}; + auto read{[&](unsigned int a, unsigned int b) { + return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); + }}; const Id offsets{ ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), @@ -297,13 +299,14 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { - const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } +#ifdef _WIN32 #pragma optimize("", off) +#endif Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c57bd291d..12a03ed6e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - const Id shift{ctx.Constant(ctx.U32[1], 5)}; + [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index 17640a622..71601f8fd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -7,7 +7,7 @@ namespace Shader { -class FileEnvironment final : public Environment { +class FileEnvironment : public Environment { public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 4811242ea..7993e5c43 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -17,7 +17,7 @@ u32 GenericAttributeIndex(Attribute attribute) { if (!IsGeneric(attribute)) { throw InvalidArgument("Attribute is not generic {}", attribute); } - return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4; + return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } std::string NameOf(Attribute attribute) { @@ -444,4 +444,4 @@ std::string NameOf(Attribute attribute) { return fmt::format("", static_cast(attribute)); } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index ec029dfd6..e1f0191f4 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -155,7 +155,7 @@ std::string DumpBlock(const Block& block, const std::map& ret += fmt::format(": begin={:04x} end={:04x}\n", block.LocationBegin(), block.LocationEnd()); for (const Inst& inst : block) { - const Opcode op{inst.Opcode()}; + const Opcode op{inst.GetOpcode()}; ret += fmt::format("[{:016x}] ", reinterpret_cast(&inst)); if (TypeOf(op) != Type::Void) { ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp index ec1659e2b..fc18ea2a2 100644 --- a/src/shader_recompiler/frontend/ir/condition.cpp +++ b/src/shader_recompiler/frontend/ir/condition.cpp @@ -12,10 +12,10 @@ namespace Shader::IR { std::string NameOf(Condition condition) { std::string ret; - if (condition.FlowTest() != FlowTest::T) { - ret = fmt::to_string(condition.FlowTest()); + if (condition.GetFlowTest() != FlowTest::T) { + ret = fmt::to_string(condition.GetFlowTest()); } - const auto [pred, negated]{condition.Pred()}; + const auto [pred, negated]{condition.GetPred()}; if (!ret.empty()) { ret += '&'; } diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 51c2f15cf..aa8597c60 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -30,11 +30,11 @@ public: auto operator<=>(const Condition&) const noexcept = default; - [[nodiscard]] IR::FlowTest FlowTest() const noexcept { + [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { return static_cast(flow_test); } - [[nodiscard]] std::pair Pred() const noexcept { + [[nodiscard]] std::pair GetPred() const noexcept { return {static_cast(pred), pred_negated != 0}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 13eb2de4c..a2104bdb3 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -290,8 +290,8 @@ static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { } U1 IREmitter::Condition(IR::Condition cond) { - const FlowTest flow_test{cond.FlowTest()}; - const auto [pred, is_negated]{cond.Pred()}; + const FlowTest flow_test{cond.GetFlowTest()}; + const auto [pred, is_negated]{cond.GetPred()}; return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 481202d94..ceb44e604 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -12,7 +12,7 @@ namespace Shader::IR { namespace { void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { - if (inst && inst->Opcode() != opcode) { + if (inst && inst->GetOpcode() != opcode) { throw LogicError("Invalid pseudo-instruction"); } } @@ -25,11 +25,17 @@ void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { } void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { - if (inst->Opcode() != expected_opcode) { + if (inst->GetOpcode() != expected_opcode) { throw LogicError("Undoing use of invalid pseudo-op"); } inst = nullptr; } + +void AllocAssociatedInsts(std::unique_ptr& associated_insts) { + if (!associated_insts) { + associated_insts = std::make_unique(); + } +} } // Anonymous namespace Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { @@ -249,12 +255,6 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void AllocAssociatedInsts(std::unique_ptr& associated_insts) { - if (!associated_insts) { - associated_insts = std::make_unique(); - } -} - void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 6658dc674..97dc91d85 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -46,7 +46,7 @@ public: } /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode Opcode() const noexcept { + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { return op; } @@ -95,7 +95,7 @@ public: requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { FlagsType ret; - std::memcpy(&ret, &flags, sizeof(ret)); + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); return ret; } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 1cb9db6c9..002dbf94e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type{type_token}, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 5f51aeb5f..89a17fb1b 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 837c1b487..1e7ffb86d 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -33,11 +33,11 @@ Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} bool Value::IsIdentity() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Identity; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; } bool Value::IsPhi() const noexcept { - return type == Type::Opaque && inst->Opcode() == Opcode::Phi; + return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; } bool Value::IsEmpty() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..a0962863d 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -94,7 +94,7 @@ public: } } - explicit TypedValue(IR::Inst* inst) : TypedValue(Value(inst)) {} + explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; using U1 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 847bb1986..cb8ec7eaa 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -34,41 +34,37 @@ struct Compare { }; u32 BranchOffset(Location pc, Instruction inst) { - return pc.Offset() + inst.branch.Offset() + 8; + return pc.Offset() + static_cast(inst.branch.Offset()) + 8u; } void Split(Block* old_block, Block* new_block, Location pc) { if (pc <= old_block->begin || pc >= old_block->end) { throw InvalidArgument("Invalid address to split={}", pc); } - *new_block = Block{ - .begin{pc}, - .end{old_block->end}, - .end_class{old_block->end_class}, - .cond{old_block->cond}, - .stack{old_block->stack}, - .branch_true{old_block->branch_true}, - .branch_false{old_block->branch_false}, - .function_call{old_block->function_call}, - .return_block{old_block->return_block}, - .branch_reg{old_block->branch_reg}, - .branch_offset{old_block->branch_offset}, - .indirect_branches{std::move(old_block->indirect_branches)}, - }; - *old_block = Block{ - .begin{old_block->begin}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{std::move(old_block->stack)}, - .branch_true{new_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + *new_block = Block{}; + new_block->begin = pc; + new_block->end = old_block->end; + new_block->end_class = old_block->end_class, + new_block->cond = old_block->cond; + new_block->stack = old_block->stack; + new_block->branch_true = old_block->branch_true; + new_block->branch_false = old_block->branch_false; + new_block->function_call = old_block->function_call; + new_block->return_block = old_block->return_block; + new_block->branch_reg = old_block->branch_reg; + new_block->branch_offset = old_block->branch_offset; + new_block->indirect_branches = std::move(old_block->indirect_branches); + + const Location old_begin{old_block->begin}; + Stack old_stack{std::move(old_block->stack)}; + *old_block = Block{}; + old_block->begin = old_begin; + old_block->end = pc; + old_block->end_class = EndClass::Branch; + old_block->cond = IR::Condition(true); + old_block->stack = old_stack; + old_block->branch_true = new_block; + old_block->branch_false = nullptr; } Token OpcodeToken(Opcode opcode) { @@ -141,7 +137,7 @@ std::string NameOf(const Block& block) { void Stack::Push(Token token, Location target) { entries.push_back({ - .token{token}, + .token = token, .target{target}, }); } @@ -177,24 +173,17 @@ bool Block::Contains(Location pc) const noexcept { } Function::Function(ObjectPool& block_pool, Location start_address) - : entrypoint{start_address}, labels{{ - .address{start_address}, - .block{block_pool.Create(Block{ - .begin{start_address}, - .end{start_address}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}, - .stack{}, - }} {} + : entrypoint{start_address} { + Label& label{labels.emplace_back()}; + label.address = start_address; + label.block = block_pool.Create(Block{}); + label.block->begin = start_address; + label.block->end = start_address; + label.block->end_class = EndClass::Branch; + label.block->cond = IR::Condition(true); + label.block->branch_true = nullptr; + label.block->branch_false = nullptr; +} CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_address) : env{env_}, block_pool{block_pool_}, program_start{start_address} { @@ -327,7 +316,8 @@ CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Locati // Insert the function into the list if it doesn't exist const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; const bool exists{it != functions.end()}; - const FunctionId call_id{exists ? std::distance(functions.begin(), it) : functions.size()}; + const FunctionId call_id{exists ? static_cast(std::distance(functions.begin(), it)) + : functions.size()}; if (!exists) { functions.emplace_back(block_pool, cal_pc); } @@ -362,20 +352,14 @@ void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, } // Create a virtual block and a conditional block Block* const conditional_block{block_pool.Create()}; - Block virtual_block{ - .begin{block->begin.Virtual()}, - .end{block->begin.Virtual()}, - .end_class{EndClass::Branch}, - .cond{cond}, - .stack{block->stack}, - .branch_true{conditional_block}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - }; + Block virtual_block{}; + virtual_block.begin = block->begin.Virtual(); + virtual_block.end = block->begin.Virtual(); + virtual_block.end_class = EndClass::Branch; + virtual_block.stack = block->stack; + virtual_block.cond = cond; + virtual_block.branch_true = conditional_block; + virtual_block.branch_false = nullptr; // Save the contents of the visited block in the conditional block *conditional_block = std::move(*block); // Impersonate the visited block with a virtual block @@ -444,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += brx_table->branch_offset; + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } @@ -455,8 +439,8 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, for (const u32 target : targets) { Block* const branch{AddLabel(block, block->stack, target, function_id)}; block->indirect_branches.push_back({ - .block{branch}, - .address{target}, + .block = branch, + .address = target, }); } block->cond = IR::Condition{true}; @@ -523,23 +507,17 @@ Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function if (label_it != function.labels.end()) { return label_it->block; } - Block* const new_block{block_pool.Create(Block{ - .begin{pc}, - .end{pc}, - .end_class{EndClass::Branch}, - .cond{true}, - .stack{stack}, - .branch_true{nullptr}, - .branch_false{nullptr}, - .function_call{}, - .return_block{}, - .branch_reg{}, - .branch_offset{}, - .indirect_branches{}, - })}; + Block* const new_block{block_pool.Create()}; + new_block->begin = pc; + new_block->end = pc; + new_block->end_class = EndClass::Branch; + new_block->cond = IR::Condition(true); + new_block->stack = stack; + new_block->branch_true = nullptr; + new_block->branch_false = nullptr; function.labels.push_back(Label{ .address{pc}, - .block{new_block}, + .block = new_block, .stack{std::move(stack)}, }); return new_block; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index bd85afa1e..932d19c1d 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -45,7 +45,7 @@ constexpr MaskValue MaskValueFromEncoding(const char* encoding) { bit >>= 1; } } - return MaskValue{.mask{mask}, .value{value}}; + return MaskValue{.mask = mask, .value = value}; } struct InstEncoding { @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode{Opcode::name}, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST @@ -116,9 +116,9 @@ constexpr auto MakeFastLookupTableIndex(size_t index) { const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; if ((index & mask) == value) { encodings.at(element) = InstInfo{ - .high_mask{static_cast(encoding.mask_value.mask >> MASK_SHIFT)}, - .high_value{static_cast(encoding.mask_value.value >> MASK_SHIFT)}, - .opcode{encoding.opcode}, + .high_mask = static_cast(encoding.mask_value.mask >> MASK_SHIFT), + .high_value = static_cast(encoding.mask_value.value >> MASK_SHIFT), + .opcode = encoding.opcode, }; ++element; } diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp index 96453509d..008625cb3 100644 --- a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp @@ -97,11 +97,11 @@ std::optional TrackIndirectBranchTable(Environment& env } const u32 imnmx_immediate{static_cast(imnmx.immediate.Value())}; return IndirectBranchTableInfo{ - .cbuf_index{cbuf_index}, - .cbuf_offset{cbuf_offset}, - .num_entries{imnmx_immediate + 1}, - .branch_offset{brx_offset}, - .branch_reg{brx_reg}, + .cbuf_index = cbuf_index, + .cbuf_offset = cbuf_offset, + .num_entries = imnmx_immediate + 1, + .branch_offset = brx_offset, + .branch_reg = brx_reg, }; } diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c804c2a8e..02cef2645 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -558,7 +558,6 @@ private: const Node label{goto_stmt->label}; const u32 label_id{label->id}; const Node label_nested_stmt{FindStatementWithLabel(body, goto_stmt)}; - const auto type{label_nested_stmt->type}; Tree loop_body; loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); @@ -566,7 +565,7 @@ private: Statement* const variable{pool.Create(Variable{}, label_id)}; Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; UpdateTreeUp(loop_stmt); - const Node loop_node{body.insert(goto_stmt, *loop_stmt)}; + body.insert(goto_stmt, *loop_stmt); Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; loop_stmt->children.push_front(*new_goto); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp index ac1433dea..5a1b3a8fc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp @@ -31,9 +31,9 @@ void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dadd.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dadd.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp index ff7321862..723841496 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp @@ -25,9 +25,9 @@ void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& s const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dfma.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dfma.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp index 3e83d1c95..4a49299a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp @@ -21,9 +21,9 @@ void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; const IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(dmul.fp_rounding)}, - .fmz_mode{IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(dmul.fp_rounding), + .fmz_mode = IR::FmzMode::None, }; v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index b39950c84..b8c89810c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -23,9 +23,9 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; IR::FpControl control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; if (sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index c02a40209..80109ca0e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -19,8 +19,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o } const fcmp{insn}; const IR::F32 zero{v.ir.Imm32(0.0f)}; - const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp index c5417775e..b9f4ee0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -29,9 +29,9 @@ void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(fset.pred)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp index 1e366fde0..035f8782a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp @@ -57,9 +57,9 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; if (f2f.src_size != f2f.dst_size) { fp_control.rounding = CastFpRounding(f2f.rounding); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 21ae92be1..cf3cf1ba6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -123,9 +123,9 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; } const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmz_mode}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = fmz_mode, }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { @@ -186,14 +186,14 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { } else if (f2i.dest_format == DestFormat::I64) { handled_special_case = true; result = IR::U64{ - v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; } } if (!handled_special_case && is_signed) { if (bitsize != 64) { result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; } else { - result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0UL), result)}; } } @@ -211,6 +211,7 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { void TranslatorVisitor::F2I_reg(u64 insn) { union { + u64 raw; F2I base; BitField<20, 8, IR::Reg> src_reg; } const f2i{insn}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 18561bc9c..fa2a7807b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -24,9 +24,9 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp index 343d91032..8ae437528 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -27,9 +27,9 @@ void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 72f0a18ae..06226b7ce 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -64,9 +64,9 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode } const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{CastFpRounding(fp_rounding)}, - .fmz_mode{CastFmzMode(fmz_mode)}, + .no_contraction = true, + .rounding = CastFpRounding(fp_rounding), + .fmz_mode = CastFmzMode(fmz_mode), }; IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; if (fmz_mode == FmzMode::FMZ && !sat) { @@ -124,4 +124,4 @@ void TranslatorVisitor::FMUL32I(u64 insn) { fmul32i.sat != 0, fmul32i.cc != 0, false); } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp index 8ff9db843..5f93a1513 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -29,9 +29,9 @@ void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const BooleanOp bop{fsetp.bop}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index e42921a21..7550a8d4c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp @@ -28,9 +28,9 @@ void TranslatorVisitor::FSWZADD(u64 insn) { const IR::U32 swizzle{ir.Imm32(static_cast(fswzadd.swizzle))}; const IR::FpControl fp_control{ - .no_contraction{false}, - .rounding{CastFpRounding(fswzadd.round)}, - .fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = CastFpRounding(fswzadd.round), + .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 03e7bf047..f2738a93b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -34,9 +34,9 @@ void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; @@ -102,8 +102,9 @@ void TranslatorVisitor::HADD2_imm(u64 insn) { BitField<20, 9, u64> low; } const hadd2{insn}; - const u32 imm{static_cast(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hadd2.low << 6) | static_cast((hadd2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hadd2.high << 22) | static_cast((hadd2.neg_high != 0 ? 1 : 0) << 31)}; HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 8b234bd6a..fd7986701 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -41,9 +41,9 @@ void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; @@ -143,8 +143,9 @@ void TranslatorVisitor::HFMA2_imm(u64 insn) { BitField<57, 2, HalfPrecision> precision; } const hfma2{insn}; - const u32 imm{static_cast(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hfma2.low << 6) | static_cast((hfma2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hfma2.high << 22) | static_cast((hfma2.neg_high != 0 ? 1 : 0) << 31)}; HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index 2451a6ef6..3f548ce76 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -35,9 +35,9 @@ void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl fp_control{ - .no_contraction{true}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{HalfPrecision2FmzMode(precision)}, + .no_contraction = true, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = HalfPrecision2FmzMode(precision), }; IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; @@ -119,8 +119,9 @@ void TranslatorVisitor::HMUL2_imm(u64 insn) { BitField<44, 1, u64> abs_a; } const hmul2{insn}; - const u32 imm{static_cast(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hmul2.low << 6) | static_cast((hmul2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hmul2.high << 22) | static_cast((hmul2.neg_high != 0 ? 1 : 0) << 31)}; HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 7f1f4b88c..cca5b831f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -41,9 +41,9 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hset2.pred)}; @@ -106,8 +106,9 @@ void TranslatorVisitor::HSET2_imm(u64 insn) { BitField<20, 9, u64> low; } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{ + static_cast(hset2.low << 6) | static_cast((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | static_cast((hset2.neg_high != 0 ? 1 : 0) << 31)}; HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, Swizzle::H1_H0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 3e2a23c92..b3931dae3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -43,9 +43,9 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); const IR::FpControl control{ - .no_contraction{false}, - .rounding{IR::FpRounding::DontCare}, - .fmz_mode{hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + .no_contraction = false, + .rounding = IR::FpRounding::DontCare, + .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), }; IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; @@ -106,8 +106,10 @@ void TranslatorVisitor::HSETP2_imm(u64 insn) { BitField<20, 9, u64> low; } const hsetp2{insn}; - const u32 imm{static_cast(hsetp2.low << 6) | ((hsetp2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hsetp2.high << 22) | ((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hsetp2.low << 6) | + static_cast((hsetp2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hsetp2.high << 22) | + static_cast((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, hsetp2.h_and != 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 30b570ce4..88bbac0a5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -49,7 +49,7 @@ void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { } const IR::Value result{ir.UnpackUint2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -63,7 +63,7 @@ void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { } const IR::Value result{ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; i++) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } @@ -156,7 +156,7 @@ IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { const auto [binding, offset_value]{CbufAddr(insn)}; const bool unaligned{cbuf.unaligned != 0}; const u32 offset{offset_value.U32()}; - const IR::Value addr{unaligned ? offset | 4 : (offset & ~7) | 4}; + const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; @@ -200,7 +200,7 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 sign_bit{static_cast(imm.is_negative != 0 ? (1ULL << 31) : 0)}; const u32 value{static_cast(imm.value) << 12}; return ir.Imm32(Common::BitCast(value | sign_bit)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 1493e1815..8ffd84867 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -68,7 +68,6 @@ void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { } const iadd{insn}; const bool po{iadd.three_for_po == 3}; - const bool neg_a{!po && iadd.neg_a != 0}; if (!po && iadd.neg_b != 0) { op_b = v.ir.INeg(op_b); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e8b5ae1d2..5a0fc36a0 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -131,7 +131,7 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { } const IR::Value vector{v.ir.UnpackDouble2x32(value)}; for (int i = 0; i < 2; ++i) { - v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, i)}); + v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -170,4 +170,4 @@ void TranslatorVisitor::I2F_imm(u64 insn) { } } -} // namespace Shader::Maxwell \ No newline at end of file +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp index ae3ecea32..2300088e3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp @@ -50,7 +50,7 @@ void TranslatorVisitor::LDC(u64 insn) { } const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; for (int i = 0; i < 2; ++i) { - X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp index 68963c8ea..e24b49721 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp @@ -40,7 +40,6 @@ std::pair GetSize(u64 insn) { BitField<48, 3, Size> size; } const encoding{insn}; - const Size nnn = encoding.size; switch (encoding.size) { case Size::U8: return {8, false}; @@ -99,7 +98,7 @@ void TranslatorVisitor::LDL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } X(dest, ir.LoadLocal(word_offset)); @@ -123,11 +122,11 @@ void TranslatorVisitor::LDS(u64 insn) { break; case 64: case 128: - if (!IR::IsAligned(dest, bit_size / 32)) { + if (!IR::IsAligned(dest, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned destination register {}", dest); } for (int element = 0; element < bit_size / 32; ++element) { - X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); + X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast(element))}); } break; } @@ -156,7 +155,7 @@ void TranslatorVisitor::STL(u64 insn) { case 32: case 64: case 128: - if (!IR::IsAligned(reg, bit_size / 32)) { + if (!IR::IsAligned(reg, static_cast(bit_size / 32))) { throw NotImplementedException("Unaligned source register"); } ir.WriteLocal(word_offset, src); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 71688b1d7..36c5cff2f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp @@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal64(address)}; for (int i = 0; i < 2; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } @@ -125,7 +125,7 @@ void TranslatorVisitor::LDG(u64 insn) { } const IR::Value vector{ir.LoadGlobal128(address)}; for (int i = 0; i < 4; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast(i))}); } break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index b2da079f9..95d416586 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -199,7 +199,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, if (tex.dc != 0) { value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); } else { - value = IR::F32{v.ir.CompositeExtract(sample, element)}; + value = IR::F32{v.ir.CompositeExtract(sample, static_cast(element))}; } v.F(dest_reg, value); ++dest_reg; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index d5fda20f4..fe2c7db85 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -53,7 +53,7 @@ constexpr std::array RGBA_LUT{ R | G | B | A, // }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index beab515ad..2ba9c1018 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -37,7 +37,7 @@ union Encoding { BitField<36, 13, u64> cbuf_offset; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 623b8fc23..0863bdfcd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -56,7 +56,7 @@ union Encoding { BitField<53, 4, u64> encoding; }; -void CheckAlignment(IR::Reg reg, int alignment) { +void CheckAlignment(IR::Reg reg, size_t alignment) { if (!IR::IsAligned(reg, alignment)) { throw NotImplementedException("Unaligned source register {}", reg); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp index 8c7e04bca..0459e5473 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp @@ -54,7 +54,7 @@ void Impl(TranslatorVisitor& v, u64 insn, std::optional cbuf_offset) { if (((txq.mask >> element) & 1) == 0) { continue; } - v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, element)}); + v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast(element))}); ++dest_reg; } } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp index af13b3fcc..ec5e74f6d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp @@ -69,7 +69,6 @@ void TranslatorVisitor::VSETP(u64 insn) { const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast(vsetp.src_b_imm)) : GetReg20(insn)}; const u32 a_selector{static_cast(vsetp.src_a_selector)}; - const u32 b_selector{is_b_imm ? 0U : static_cast(vsetp.src_b_selector)}; const VideoWidth a_width{vsetp.src_a_width}; const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1c03ee82a..edbfcd308 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { @@ -22,8 +23,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { auto& cbufs{info.constant_buffer_descriptors}; cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), ConstantBufferDescriptor{ - .index{index}, - .count{1}, + .index = index, + .count = 1, }); } @@ -91,7 +92,7 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } void VisitUsages(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x3: case IR::Opcode::CompositeConstructF16x4: @@ -209,7 +210,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: case IR::Opcode::UndefU8: @@ -236,7 +237,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: case IR::Opcode::UndefU16: @@ -271,7 +272,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::UndefU64: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS8: @@ -314,7 +315,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { default: break; } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::DemoteToHelperInvocation: info.uses_demote_to_helper_invocation = true; break; @@ -361,7 +362,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } else { throw NotImplementedException("Constant buffer with non-immediate index"); } - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; @@ -443,7 +444,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { } void VisitFpModifiers(Info& info, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::FPAdd16: case IR::Opcode::FPFma16: case IR::Opcode::FPMul16: @@ -540,7 +541,6 @@ void GatherInfoFromHeader(Environment& env, Info& info) { info.stores_position |= header.vtg.omap_systemb.position != 0; } } - } // Anonymous namespace void CollectShaderInfoPass(Environment& env, IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 1720d7a09..61fbbe04c 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -58,7 +58,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (is_lhs_immediate && !is_rhs_immediate) { IR::Inst* const rhs_inst{rhs.InstRecursive()}; - if (rhs_inst->Opcode() == inst.Opcode() && rhs_inst->Arg(1).IsImmediate()) { + if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(lhs), Arg(rhs_inst->Arg(1)))}; inst.SetArg(0, rhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -70,7 +70,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { } if (!is_lhs_immediate && is_rhs_immediate) { const IR::Inst* const lhs_inst{lhs.InstRecursive()}; - if (lhs_inst->Opcode() == inst.Opcode() && lhs_inst->Arg(1).IsImmediate()) { + if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { const auto combined{imm_fn(Arg(rhs), Arg(lhs_inst->Arg(1)))}; inst.SetArg(0, lhs_inst->Arg(0)); inst.SetArg(1, IR::Value{combined}); @@ -123,7 +123,8 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { return false; } IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; - if (lhs_shl->Opcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { + if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || + lhs_shl->Arg(1) != IR::Value{16U}) { return false; } if (lhs_shl->Arg(0).IsImmediate()) { @@ -131,7 +132,7 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; - if (lhs_mul->Opcode() != IR::Opcode::IMul32 || rhs_mul->Opcode() != IR::Opcode::IMul32) { + if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { @@ -143,10 +144,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { } IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; - if (lhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } - if (rhs_bfe->Opcode() != IR::Opcode::BitFieldUExtract) { + if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { @@ -194,8 +195,9 @@ void FoldISub32(IR::Inst& inst) { // ISub32 is generally used to subtract two constant buffers, compare and replace this with // zero if they equal. const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { - return a->Opcode() == IR::Opcode::GetCbufU32 && b->Opcode() == IR::Opcode::GetCbufU32 && - a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + return a->GetOpcode() == IR::Opcode::GetCbufU32 && + b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && + a->Arg(1) == b->Arg(1); }}; IR::Inst* op_a{inst.Arg(0).InstRecursive()}; IR::Inst* op_b{inst.Arg(1).InstRecursive()}; @@ -204,15 +206,15 @@ void FoldISub32(IR::Inst& inst) { return; } // It's also possible a value is being added to a cbuf and then subtracted - if (op_b->Opcode() == IR::Opcode::IAdd32) { + if (op_b->GetOpcode() == IR::Opcode::IAdd32) { // Canonicalize local variables to simplify the following logic std::swap(op_a, op_b); } - if (op_b->Opcode() != IR::Opcode::GetCbufU32) { + if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { return; } IR::Inst* const inst_cbuf{op_b}; - if (op_a->Opcode() != IR::Opcode::IAdd32) { + if (op_a->GetOpcode() != IR::Opcode::IAdd32) { return; } IR::Value add_op_a{op_a->Arg(0)}; @@ -250,7 +252,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const lhs_op{lhs_value.InstRecursive()}; IR::Inst* const rhs_op{rhs_value.InstRecursive()}; - if (lhs_op->Opcode() != IR::Opcode::FPMul32 || rhs_op->Opcode() != IR::Opcode::FPRecip32) { + if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || + rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { return; } const IR::Value recip_source{rhs_op->Arg(0)}; @@ -260,8 +263,8 @@ void FoldFPMul32(IR::Inst& inst) { } IR::Inst* const attr_a{recip_source.InstRecursive()}; IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; - if (attr_a->Opcode() != IR::Opcode::GetAttribute || - attr_b->Opcode() != IR::Opcode::GetAttribute) { + if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || + attr_b->GetOpcode() != IR::Opcode::GetAttribute) { return; } if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { @@ -304,7 +307,7 @@ void FoldLogicalNot(IR::Inst& inst) { return; } IR::Inst* const arg{value.InstRecursive()}; - if (arg->Opcode() == IR::Opcode::LogicalNot) { + if (arg->GetOpcode() == IR::Opcode::LogicalNot) { inst.ReplaceUsesWith(arg->Arg(0)); } } @@ -317,12 +320,12 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } if constexpr (op == IR::Opcode::BitCastF32U32) { - if (arg_inst->Opcode() == IR::Opcode::GetCbufU32) { + if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { // Replace the bitcast with a typed constant buffer read inst.ReplaceOpcode(IR::Opcode::GetCbufF32); inst.SetArg(0, arg_inst->Arg(0)); @@ -338,7 +341,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { return; } IR::Inst* const arg_inst{value.InstRecursive()}; - if (arg_inst->Opcode() == reverse) { + if (arg_inst->GetOpcode() == reverse) { inst.ReplaceUsesWith(arg_inst->Arg(0)); return; } @@ -347,7 +350,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { template IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence) { using Traits = LambdaTraits; - return IR::Value{func(Arg>(inst.Arg(I))...)}; + return IR::Value{func(Arg>(inst.Arg(I))...)}; } void FoldBranchConditional(IR::Inst& inst) { @@ -357,7 +360,7 @@ void FoldBranchConditional(IR::Inst& inst) { return; } const IR::Inst* cond_inst{cond.InstRecursive()}; - if (cond_inst->Opcode() == IR::Opcode::LogicalNot) { + if (cond_inst->GetOpcode() == IR::Opcode::LogicalNot) { const IR::Value true_label{inst.Arg(1)}; const IR::Value false_label{inst.Arg(2)}; // Remove negation on the conditional (take the parameter out of LogicalNot) and swap @@ -371,10 +374,10 @@ void FoldBranchConditional(IR::Inst& inst) { std::optional FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, IR::Opcode construct, u32 first_index) { IR::Inst* const inst{inst_value.InstRecursive()}; - if (inst->Opcode() == construct) { + if (inst->GetOpcode() == construct) { return inst->Arg(first_index); } - if (inst->Opcode() != insert) { + if (inst->GetOpcode() != insert) { return std::nullopt; } IR::Value value_index{inst->Arg(2)}; @@ -410,7 +413,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser } void ConstantPropagation(IR::Block& block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::GetRegister: return FoldGetRegister(inst); case IR::Opcode::GetPred: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0858a0bdd..90a65dd16 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -57,7 +57,7 @@ struct StorageInfo { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -80,7 +80,7 @@ bool IsGlobalMemory(const IR::Inst& inst) { /// Returns true when the instruction is a global memory instruction bool IsGlobalMemoryWrite(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalS16: @@ -140,7 +140,7 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -164,7 +164,7 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); } } @@ -184,7 +184,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { // This address is expected to either be a PackUint2x32 or a IAdd64 IR::Inst* addr_inst{addr.InstRecursive()}; s32 imm_offset{0}; - if (addr_inst->Opcode() == IR::Opcode::IAdd64) { + if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { // If it's an IAdd64, get the immediate offset it is applying and grab the address // instruction. This expects for the instruction to be canonicalized having the address on // the first argument and the immediate offset on the second one. @@ -200,7 +200,7 @@ std::optional TrackLowAddress(IR::Inst* inst) { addr_inst = iadd_addr.Inst(); } // With IAdd64 handled, now PackUint2x32 is expected without exceptions - if (addr_inst->Opcode() != IR::Opcode::PackUint2x32) { + if (addr_inst->GetOpcode() != IR::Opcode::PackUint2x32) { return std::nullopt; } // PackUint2x32 is expected to be generated from a vector @@ -210,20 +210,20 @@ std::optional TrackLowAddress(IR::Inst* inst) { } // This vector is expected to be a CompositeConstructU32x2 IR::Inst* const vector_inst{vector.InstRecursive()}; - if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) { + if (vector_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { return std::nullopt; } // Grab the first argument from the CompositeConstructU32x2, this is the low address. return LowAddrInfo{ .value{IR::U32{vector_inst->Arg(0)}}, - .imm_offset{imm_offset}, + .imm_offset = imm_offset, }; } /// Tries to track the storage buffer address used by a global memory instruction std::optional Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -256,9 +256,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ - .index{0}, - .offset_begin{0x110}, - .offset_end{0x610}, + .index = 0, + .offset_begin = 0x110, + .offset_end = 0x610, }; // Track the low address of the instruction const std::optional low_addr_info{TrackLowAddress(&inst)}; @@ -286,8 +286,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) info.set.insert(*storage_buffer); info.to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{&inst}, - .block{&block}, + .inst = &inst, + .block = &block, }); } @@ -312,7 +312,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer /// Replace a global memory load instruction with its storage buffer equivalent void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; inst.ReplaceUsesWith(value); @@ -321,7 +321,7 @@ void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, /// Replace a global memory write instruction with its storage buffer equivalent void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); inst.Invalidate(); @@ -330,7 +330,7 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -348,7 +348,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } } } // Anonymous namespace @@ -366,9 +366,9 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ - .cbuf_index{storage_buffer.index}, - .cbuf_offset{storage_buffer.offset}, - .count{1}, + .cbuf_index = storage_buffer.index, + .cbuf_offset = storage_buffer.offset, + .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); ++storage_index; diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 8790b48f2..38af72dfe 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -22,7 +22,8 @@ void IdentityRemovalPass(IR::Program& program) { inst->SetArg(i, arg.Inst()->Arg(0)); } } - if (inst->Opcode() == IR::Opcode::Identity || inst->Opcode() == IR::Opcode::Void) { + if (inst->GetOpcode() == IR::Opcode::Identity || + inst->GetOpcode() == IR::Opcode::Void) { to_invalidate.push_back(&*inst); inst = block->Instructions().erase(inst); } else { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0d2c91ed6..52576b07f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -123,7 +123,7 @@ IR::Opcode Replace(IR::Opcode op) { void LowerFp16ToFp32(IR::Program& program) { for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { - inst.ReplaceOpcode(Replace(inst.Opcode())); + inst.ReplaceOpcode(Replace(inst.GetOpcode())); } } } diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index ca36253d1..346fcc377 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -116,7 +116,7 @@ IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { } [[nodiscard]] bool IsPhi(const IR::Inst& inst) noexcept { - return inst.Opcode() == IR::Opcode::Phi; + return inst.GetOpcode() == IR::Opcode::Phi; } enum class Status { @@ -278,7 +278,7 @@ private: }; void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::SetRegister: if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { pass.WriteVariable(reg, block, inst.Arg(1)); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 290ce4179..c8aee3d3d 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -30,7 +30,7 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; IR::Opcode IndexedInstruction(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BoundImageSampleImplicitLod: return IR::Opcode::ImageSampleImplicitLod; @@ -67,7 +67,7 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { } bool IsBindless(const IR::Inst& inst) { - switch (inst.Opcode()) { + switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: case IR::Opcode::BindlessImageSampleExplicitLod: case IR::Opcode::BindlessImageSampleDrefImplicitLod: @@ -91,7 +91,7 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: return false; default: - throw InvalidArgument("Invalid opcode {}", inst.Opcode()); + throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); } } @@ -100,7 +100,7 @@ bool IsTextureInstruction(const IR::Inst& inst) { } std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->Opcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; @@ -134,14 +134,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = *track_addr; } else { addr = ConstBufferAddr{ - .index{env.TextureBoundBuffer()}, - .offset{inst.Arg(0).U32()}, + .index = env.TextureBoundBuffer(), + .offset = inst.Arg(0).U32(), }; } return TextureInst{ .cbuf{addr}, - .inst{&inst}, - .block{block}, + .inst = &inst, + .block = block, }; } @@ -211,7 +211,7 @@ void TexturePass(Environment& env, IR::Program& program) { const auto& cbuf{texture_inst.cbuf}; auto flags{inst->Flags()}; - switch (inst->Opcode()) { + switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); inst->SetFlags(flags); @@ -235,16 +235,16 @@ void TexturePass(Environment& env, IR::Program& program) { u32 index; if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ - .type{flags.type}, - .cbuf_index{cbuf.index}, - .cbuf_offset{cbuf.offset}, - .count{1}, + .type = flags.type, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, }); } inst->SetArg(0, IR::Value{index}); diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index 4080b37cc..dbec96d84 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -14,14 +14,14 @@ namespace Shader::Optimization { static void ValidateTypes(const IR::Program& program) { for (const auto& block : program.blocks) { for (const IR::Inst& inst : *block) { - if (inst.Opcode() == IR::Opcode::Phi) { + if (inst.GetOpcode() == IR::Opcode::Phi) { // Skip validation on phi nodes continue; } const size_t num_args{inst.NumArgs()}; for (size_t i = 0; i < num_args; ++i) { const IR::Type t1{inst.Arg(i).Type()}; - const IR::Type t2{IR::ArgTypeOf(inst.Opcode(), i)}; + const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; if (!IR::AreTypesCompatible(t1, t2)) { throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); } diff --git a/src/tests/common/unique_function.cpp b/src/tests/common/unique_function.cpp index ac9912738..aa6e86593 100644 --- a/src/tests/common/unique_function.cpp +++ b/src/tests/common/unique_function.cpp @@ -17,10 +17,12 @@ struct Noisy { Noisy& operator=(Noisy&& rhs) noexcept { state = "Move assigned"; rhs.state = "Moved away"; + return *this; } Noisy(const Noisy&) : state{"Copied constructed"} {} Noisy& operator=(const Noisy&) { state = "Copied assigned"; + return *this; } std::string state; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 71b07c194..3166a69dc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -203,7 +203,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) +target_link_libraries(video_core PUBLIC glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 893258b4a..57e2d569c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -447,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .dynamicStateCount = static_cast(dynamic_states.size()), .pDynamicStates = dynamic_states.data(), }; - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, .requiredSubgroupSize = GuestWarpSize, @@ -457,15 +457,16 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!spv_modules[stage]) { continue; } - [[maybe_unused]] auto& stage_ci = shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), - .module = *spv_modules[stage], - .pName = "main", - .pSpecializationInfo = nullptr, - }); + [[maybe_unused]] auto& stage_ci = + shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .module = *spv_modules[stage], + .pName = "main", + .pSpecializationInfo = nullptr, + }); /* if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { stage_ci.pNext = &subgroup_size_ci; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23bf84a92..fcebb8f6e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,7 +47,7 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -u64 MakeCbufKey(u32 index, u32 offset) { +static u64 MakeCbufKey(u32 index, u32 offset) { return (static_cast(index) << 32) | offset; } @@ -638,6 +638,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, + .fixed_state_point_size{}, }; } @@ -748,7 +749,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Environment& env{*envs[env_index]}; ++env_index; - const u32 cfg_offset{env.StartAddress() + sizeof(Shader::ProgramHeader)}; + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); } diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index b2dcd74ab..991afe521 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include #include diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e42b091c5..70328680d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -279,7 +279,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { }; } -[[nodiscard]] std::vector TransformBufferCopies( +[[maybe_unused]] [[nodiscard]] std::vector TransformBufferCopies( std::span copies, size_t buffer_offset) { std::vector result(copies.size()); std::ranges::transform( -- cgit v1.2.3 From 5bfcafa0a21619e8cd82c38ec51e260838f42042 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 10 Apr 2021 02:32:55 -0400 Subject: shader: Address feedback + clang format --- src/shader_recompiler/CMakeLists.txt | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ---- src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 1 - src/shader_recompiler/frontend/ir/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/control_flow.cpp | 4 ++-- src/shader_recompiler/frontend/maxwell/control_flow.h | 16 ++++++++-------- src/shader_recompiler/frontend/maxwell/decode.cpp | 2 +- .../frontend/maxwell/translate/impl/common_funcs.cpp | 5 +++-- .../frontend/maxwell/translate/impl/not_implemented.cpp | 1 - .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 3 ++- src/shader_recompiler/object_pool.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pipeline.h | 4 ++-- 12 files changed, 22 insertions(+), 24 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 551bf1c58..6b5df23e2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -197,6 +197,8 @@ else() $<$:-Werror=unused-but-set-variable> -Werror=unused-variable + # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. + # And this in turns limits the size of a std::array. $<$:-fbracket-depth=1024> ) endif() diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 815ca6299..6a89c0f79 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -304,10 +304,6 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); } -#ifdef _WIN32 -#pragma optimize("", off) -#endif - Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, Id lod, Id ms) { const auto info{inst->Flags()}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 12a03ed6e..f6196653a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -7,7 +7,6 @@ namespace Shader::Backend::SPIRV { namespace { Id WarpExtract(EmitContext& ctx, Id value) { - [[maybe_unused]] const Id shift{ctx.Constant(ctx.U32[1], 5)}; const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); } diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 002dbf94e..7d3e0b2ab 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -49,7 +49,7 @@ constexpr std::array META_TABLE{ #define OPCODE(name_token, type_token, ...) \ OpcodeMeta{ \ .name{#name_token}, \ - .type = type_token, \ + .type = type_token, \ .arg_types{__VA_ARGS__}, \ }, #include "opcodes.inc" diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index cb8ec7eaa..9811183f1 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -44,7 +44,7 @@ void Split(Block* old_block, Block* new_block, Location pc) { *new_block = Block{}; new_block->begin = pc; new_block->end = old_block->end; - new_block->end_class = old_block->end_class, + new_block->end_class = old_block->end_class; new_block->cond = old_block->cond; new_block->stack = old_block->stack; new_block->branch_true = old_block->branch_true; @@ -428,7 +428,7 @@ CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, if (!is_absolute) { target += pc.Offset(); } - target += static_cast(brx_table->branch_offset); + target += static_cast(brx_table->branch_offset); target += 8; targets.push_back(target); } diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 9f570fbb5..89966b16a 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -78,15 +78,15 @@ struct Block : boost::intrusive::set_base_hook< Location begin; Location end; - EndClass end_class; - IR::Condition cond; + EndClass end_class{}; + IR::Condition cond{}; Stack stack; - Block* branch_true; - Block* branch_false; - FunctionId function_call; - Block* return_block; - IR::Reg branch_reg; - s32 branch_offset; + Block* branch_true{}; + Block* branch_false{}; + FunctionId function_call{}; + Block* return_block{}; + IR::Reg branch_reg{}; + s32 branch_offset{}; std::vector indirect_branches; }; diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp index 932d19c1d..972f677dc 100644 --- a/src/shader_recompiler/frontend/maxwell/decode.cpp +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp @@ -56,7 +56,7 @@ constexpr std::array UNORDERED_ENCODINGS{ #define INST(name, cute, encode) \ InstEncoding{ \ .mask_value{MaskValueFromEncoding(encode)}, \ - .opcode = Opcode::name, \ + .opcode = Opcode::name, \ }, #include "maxwell.inc" #undef INST diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index d30e82b10..10bb01d99 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,8 +72,9 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, - FPCompareOp compare_op, IR::FpControl control) { +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, + IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { case FPCompareOp::F: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index ba0cfa673..c23901052 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -65,7 +65,6 @@ void TranslatorVisitor::CS2R(u64) { ThrowNotImplemented(Opcode::CS2R); } - void TranslatorVisitor::FCHK_reg(u64) { ThrowNotImplemented(Opcode::FCHK_reg); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 90a65dd16..afe871505 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -164,7 +164,8 @@ void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", inst.GetOpcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", + inst.GetOpcode()); } } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index 424281634..f8b255b66 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template - requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v[[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward(args)...); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 104e6cc85..8efdc2926 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include #include "common/common_types.h" #include "common/thread_worker.h" -- cgit v1.2.3 From 7cb2ab358517d95ebcd35c94c72b9e91762906c3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Apr 2021 01:45:39 -0300 Subject: shader: Implement SULD and SUST --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 97 +++++-- src/shader_recompiler/backend/spirv/emit_context.h | 9 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 6 + .../backend/spirv/emit_spirv_image.cpp | 46 +++- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 11 + src/shader_recompiler/frontend/ir/ir_emitter.h | 7 +- src/shader_recompiler/frontend/ir/modifiers.h | 12 +- src/shader_recompiler/frontend/ir/opcodes.inc | 6 + .../maxwell/translate/impl/not_implemented.cpp | 8 - .../maxwell/translate/impl/surface_load_store.cpp | 280 +++++++++++++++++++++ .../maxwell/translate/impl/texture_fetch.cpp | 19 +- .../translate/impl/texture_fetch_swizzled.cpp | 12 +- .../maxwell/translate/impl/texture_gather.cpp | 19 +- .../translate/impl/texture_gather_swizzled.cpp | 3 +- .../maxwell/translate/impl/texture_gradient.cpp | 18 +- .../maxwell/translate/impl/texture_load.cpp | 18 +- .../translate/impl/texture_load_swizzled.cpp | 2 +- .../translate/impl/texture_mipmap_level.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 91 +++++-- src/shader_recompiler/shader_info.h | 47 ++-- src/video_core/renderer_vulkan/blit_image.cpp | 4 +- src/video_core/renderer_vulkan/pipeline_helper.h | 43 +--- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 + src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- .../renderer_vulkan/vk_texture_cache.cpp | 112 +++++++-- src/video_core/renderer_vulkan/vk_texture_cache.h | 23 +- src/video_core/texture_cache/texture_cache.h | 8 + 31 files changed, 732 insertions(+), 202 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6b5df23e2..8e1d37373 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp + frontend/maxwell/translate/impl/surface_load_store.cpp frontend/maxwell/translate/impl/texture_fetch.cpp frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp frontend/maxwell/translate/impl/texture_gather_swizzled.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0c114402b..32f8c4508 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -18,41 +18,70 @@ namespace { Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; + const bool depth{desc.is_depth}; switch (desc.type) { case TextureType::Color1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); case TextureType::ColorArray1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); case TextureType::Color2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); case TextureType::ColorArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); case TextureType::Color3D: - return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); case TextureType::ColorCube: - return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); case TextureType::ColorArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format); - case TextureType::Shadow1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format); - case TextureType::ShadowArray1D: - return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format); - case TextureType::Shadow2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format); - case TextureType::ShadowArray2D: - return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format); - case TextureType::Shadow3D: - return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format); - case TextureType::ShadowCube: - return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format); - case TextureType::ShadowArrayCube: - return ctx.TypeImage(type, spv::Dim::Cube, true, true, false, 1, format); + return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); case TextureType::Buffer: break; } throw InvalidArgument("Invalid texture type {}", desc.type); } +Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { + const spv::ImageFormat format{[&] { + switch (desc.format) { + case ImageFormat::Typeless: + return spv::ImageFormat::Unknown; + case ImageFormat::R8_UINT: + return spv::ImageFormat::R8ui; + case ImageFormat::R8_SINT: + return spv::ImageFormat::R8i; + case ImageFormat::R16_UINT: + return spv::ImageFormat::R16ui; + case ImageFormat::R16_SINT: + return spv::ImageFormat::R16i; + case ImageFormat::R32_UINT: + return spv::ImageFormat::R32ui; + case ImageFormat::R32G32_UINT: + return spv::ImageFormat::Rg32ui; + case ImageFormat::R32G32B32A32_UINT: + return spv::ImageFormat::Rgba32ui; + } + throw InvalidArgument("Invalid image format {}", desc.format); + }()}; + const Id type{ctx.U32[1]}; + switch (desc.type) { + case TextureType::Color1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); + case TextureType::ColorArray1D: + return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); + case TextureType::Color2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); + case TextureType::ColorArray2D: + return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); + case TextureType::Color3D: + return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); + case TextureType::Buffer: + throw NotImplementedException("Image buffer"); + default: + break; + } + throw InvalidArgument("Invalid texture type {}", desc.type); +} + Id DefineVariable(EmitContext& ctx, Id type, std::optional builtin, spv::StorageClass storage_class) { const Id pointer_type{ctx.TypePointer(storage_class, type)}; @@ -134,6 +163,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); DefineTextures(program.info, binding); + DefineImages(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -572,6 +602,31 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } +void EmitContext::DefineImages(const Info& info, u32& binding) { + images.reserve(info.image_descriptors.size()); + for (const ImageDescriptor& desc : info.image_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of textures"); + } + const Id image_type{ImageType(*this, desc)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("img{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + for (u32 index = 0; index < desc.count; ++index) { + images.push_back(ImageDefinition{ + .id{id}, + .image_type{image_type}, + }); + } + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index f1ac4430c..e70f3458c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -35,6 +35,11 @@ struct TextureDefinition { Id image_type; }; +struct ImageDefinition { + Id id; + Id image_type; +}; + struct UniformDefinitions { Id U8{}; Id S8{}; @@ -95,8 +100,9 @@ public: std::array cbufs{}; std::array ssbos{}; - std::vector textures; std::vector texture_buffers; + std::vector textures; + std::vector images; Id workgroup_id{}; Id local_invocation_id{}; @@ -156,6 +162,7 @@ private: void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); + void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 355cf0ca8..ecd0fac5c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -253,6 +253,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::SampledBuffer); + ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } Id PhiArgDef(EmitContext& ctx, IR::Inst* inst, size_t index) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 08460c94e..a39b16f1e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -369,6 +369,8 @@ Id EmitBindlessImageFetch(EmitContext&); Id EmitBindlessImageQueryDimensions(EmitContext&); Id EmitBindlessImageQueryLod(EmitContext&); Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); @@ -379,6 +381,8 @@ Id EmitBoundImageFetch(EmitContext&); Id EmitBoundImageQueryDimensions(EmitContext&); Id EmitBoundImageQueryLod(EmitContext&); Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -397,6 +401,8 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); Id EmitVoteAll(EmitContext& ctx, Id pred); Id EmitVoteAny(EmitContext& ctx, Id pred); Id EmitVoteEqual(EmitContext& ctx, Id pred); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6a89c0f79..dd261fd47 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -144,6 +144,18 @@ Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo in } } +Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { + if (!index.IsImmediate()) { + throw NotImplementedException("Indirect image indexing"); + } + if (info.type == TextureType::Buffer) { + throw NotImplementedException("Image buffer"); + } else { + const ImageDefinition def{ctx.images.at(index.U32())}; + return ctx.OpLoad(def.image_type, def.id); + } +} + Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { const auto info{inst->Flags()}; if (info.relaxed_precision != 0) { @@ -209,6 +221,14 @@ Id EmitBindlessImageGradient(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBindlessImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBindlessImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitBoundImageSampleImplicitLod(EmitContext&) { throw LogicError("Unreachable instruction"); } @@ -249,6 +269,14 @@ Id EmitBoundImageGradient(EmitContext&) { throw LogicError("Unreachable instruction"); } +Id EmitBoundImageRead(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +Id EmitBoundImageWrite(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id bias_lc, Id offset) { const auto info{inst->Flags()}; @@ -322,23 +350,16 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { case TextureType::Color1D: - case TextureType::Shadow1D: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), zero, zero, mips()); case TextureType::ColorArray1D: case TextureType::Color2D: case TextureType::ColorCube: - case TextureType::ShadowArray1D: - case TextureType::Shadow2D: - case TextureType::ShadowCube: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), zero, mips()); case TextureType::ColorArray2D: case TextureType::Color3D: case TextureType::ColorArrayCube: - case TextureType::ShadowArray2D: - case TextureType::Shadow3D: - case TextureType::ShadowArrayCube: return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), mips()); case TextureType::Buffer: @@ -365,4 +386,15 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I coords, operands.Mask(), operands.Span()); } +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags()}; + return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], + Image(ctx, index, info), coords, std::nullopt, std::span{}); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { + const auto info{inst->Flags()}; + ctx.OpImageWrite(Image(ctx, index, info), coords, color); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index a2104bdb3..17be0c639 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1620,6 +1620,17 @@ Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const V return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); } +Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead}; + return Inst(op, Flags{info}, handle, coords); +} + +void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info) { + const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; + Inst(op, Flags{info}, handle, coords, color); +} + U1 IREmitter::VoteAll(const U1& value) { return Inst(Opcode::VoteAll, value); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 2cab1dc5d..ec60070ef 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -265,20 +265,19 @@ public: [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info); - [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, TextureInstInfo info); - [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, const Value& offset2, const F32& dref, TextureInstInfo info); - [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); - [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, const Value& derivates, const Value& offset, const F32& lod_clamp, TextureInstInfo info); + [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); + [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, + TextureInstInfo info); [[nodiscard]] U1 VoteAll(const U1& value); [[nodiscard]] U1 VoteAny(const U1& value); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 461671326..447e9703c 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -43,11 +43,13 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; BitField<0, 8, TextureType> type; - BitField<8, 1, u32> has_bias; - BitField<9, 1, u32> has_lod_clamp; - BitField<10, 1, u32> relaxed_precision; - BitField<11, 2, u32> gather_component; - BitField<13, 2, u32> num_derivates; + BitField<8, 1, u32> is_depth; + BitField<9, 1, u32> has_bias; + BitField<10, 1, u32> has_lod_clamp; + BitField<11, 1, u32> relaxed_precision; + BitField<12, 2, u32> gather_component; + BitField<14, 2, u32> num_derivates; + BitField<16, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1697de965..82c5b37ba 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -389,6 +389,8 @@ OPCODE(BindlessImageFetch, F32x4, U32, OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) +OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, ) OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -400,6 +402,8 @@ OPCODE(BoundImageFetch, F32x4, U32, OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(BoundImageRead, U32x4, U32, Opaque, ) +OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) @@ -411,6 +415,8 @@ OPCODE(ImageFetch, F32x4, U32, OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, U32, Opaque, ) +OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) // Warp operations OPCODE(VoteAll, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c23901052..327941223 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -281,18 +281,10 @@ void TranslatorVisitor::SUATOM_cas(u64) { ThrowNotImplemented(Opcode::SUATOM_cas); } -void TranslatorVisitor::SULD(u64) { - ThrowNotImplemented(Opcode::SULD); -} - void TranslatorVisitor::SURED(u64) { ThrowNotImplemented(Opcode::SURED); } -void TranslatorVisitor::SUST(u64) { - ThrowNotImplemented(Opcode::SUST); -} - void TranslatorVisitor::SYNC(u64) { ThrowNotImplemented(Opcode::SYNC); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..9a2d16a6e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -0,0 +1,280 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/ir/modifiers.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Type : u64 { + _1D, + BUFFER_1D, + ARRAY_1D, + _2D, + ARRAY_2D, + _3D, +}; + +constexpr unsigned R = 1 << 0; +constexpr unsigned G = 1 << 1; +constexpr unsigned B = 1 << 2; +constexpr unsigned A = 1 << 3; + +constexpr std::array MASK{ + 0U, // + R, // + G, // + R | G, // + B, // + R | B, // + G | B, // + R | G | B, // + A, // + R | A, // + G | A, // + R | G | A, // + B | A, // + R | B | A, // + G | B | A, // + R | G | B | A, // +}; + +enum class Size : u64 { + U8, + S8, + U16, + S16, + B32, + B64, + B128, +}; + +enum class Clamp : u64 { + IGN, + Default, + TRAP, +}; + +enum class LoadCache : u64 { + Default, + CG, + CI, + CV, +}; + +enum class StoreCache : u64 { + Default, + CG, + CS, + WT, +}; + +ImageFormat Format(Size size) { + switch (size) { + case Size::U8: + return ImageFormat::R8_UINT; + case Size::S8: + return ImageFormat::R8_SINT; + case Size::U16: + return ImageFormat::R16_UINT; + case Size::S16: + return ImageFormat::R16_SINT; + case Size::B32: + return ImageFormat::R32_UINT; + case Size::B64: + return ImageFormat::R32G32_UINT; + case Size::B128: + return ImageFormat::R32G32B32A32_UINT; + } + throw NotImplementedException("Invalid size {}", size); +} + +int SizeInRegs(Size size) { + switch (size) { + case Size::U8: + case Size::S8: + case Size::U16: + case Size::S16: + case Size::B32: + return 1; + case Size::B64: + return 2; + case Size::B128: + return 4; + } + throw NotImplementedException("Invalid size {}", size); +} + +TextureType GetType(Type type) { + switch (type) { + case Type::_1D: + return TextureType::Color1D; + case Type::BUFFER_1D: + return TextureType::Buffer; + case Type::ARRAY_1D: + return TextureType::ColorArray1D; + case Type::_2D: + return TextureType::Color2D; + case Type::ARRAY_2D: + return TextureType::ColorArray2D; + case Type::_3D: + return TextureType::Color3D; + } + throw NotImplementedException("Invalid type {}", type); +} + +IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { + const auto array{[&](int index) { + return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); + }}; + switch (type) { + case Type::_1D: + case Type::BUFFER_1D: + return v.X(reg); + case Type::ARRAY_1D: + return v.ir.CompositeConstruct(v.X(reg), array(1)); + case Type::_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); + case Type::ARRAY_2D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); + case Type::_3D: + return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 3)); + } + throw NotImplementedException("Invalid type {}", type); +} + +unsigned SwizzleMask(u64 swizzle) { + if (swizzle == 0 || swizzle >= MASK.size()) { + throw NotImplementedException("Invalid swizzle {}", swizzle); + } + return MASK[swizzle]; +} + +IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { + std::array colors; + for (int i = 0; i < num_regs; ++i) { + colors[i] = ir.GetReg(reg + i); + } + for (int i = num_regs; i < 4; ++i) { + colors[i] = ir.Imm32(0); + } + return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); +} +} // Anonymous namespace + +void TranslatorVisitor::SULD(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, LoadCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const suld{insn}; + + if (suld.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", suld.clamp.Value()); + } + if (suld.cache != LoadCache::Default) { + throw NotImplementedException("Cache {}", suld.cache.Value()); + } + const bool is_typed{suld.d != 0}; + if (is_typed && suld.ba != 0) { + throw NotImplementedException("BA"); + } + + const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; + const TextureType type{GetType(suld.type)}; + const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; + const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast(suld.bound_offset * 4)) + : X(suld.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + const IR::Value result{ir.ImageRead(handle, coords, info)}; + IR::Reg dest_reg{suld.dest_reg}; + if (is_typed) { + const int num_regs{SizeInRegs(suld.size)}; + for (int i = 0; i < num_regs; ++i) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } + } else { + const unsigned mask{SwizzleMask(suld.swizzle)}; + const int bits{std::popcount(mask)}; + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + throw NotImplementedException("Unaligned destination register"); + } + for (unsigned component = 0; component < 4; ++component) { + if (((mask >> component) & 1) == 0) { + continue; + } + X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); + ++dest_reg; + } + } +} + +void TranslatorVisitor::SUST(u64 insn) { + union { + u64 raw; + BitField<51, 1, u64> is_bound; + BitField<52, 1, u64> d; + BitField<23, 1, u64> ba; + BitField<33, 3, Type> type; + BitField<24, 2, StoreCache> cache; + BitField<20, 3, Size> size; // .D + BitField<20, 4, u64> swizzle; // .P + BitField<49, 2, Clamp> clamp; + BitField<0, 8, IR::Reg> data_reg; + BitField<8, 8, IR::Reg> coord_reg; + BitField<36, 13, u64> bound_offset; // is_bound + BitField<39, 8, IR::Reg> bindless_reg; // !is_bound + } const sust{insn}; + + if (sust.clamp != Clamp::IGN) { + throw NotImplementedException("Clamp {}", sust.clamp.Value()); + } + if (sust.cache != StoreCache::Default) { + throw NotImplementedException("Cache {}", sust.cache.Value()); + } + const bool is_typed{sust.d != 0}; + if (is_typed && sust.ba != 0) { + throw NotImplementedException("BA"); + } + const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; + const TextureType type{GetType(sust.type)}; + const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; + const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast(sust.bound_offset * 4)) + : X(sust.bindless_reg)}; + IR::TextureInstInfo info{}; + info.type.Assign(type); + info.image_format.Assign(format); + + IR::Value color; + if (is_typed) { + color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); + } else { + const unsigned mask{SwizzleMask(sust.swizzle)}; + if (mask != 0xf) { + throw NotImplementedException("Non-full mask"); + } + color = MakeColor(ir, sust.data_reg, 4); + } + ir.ImageWrite(handle, coords, color, info); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 95d416586..9671d115e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp @@ -33,24 +33,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -169,7 +169,8 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tex.type, tex.dc != 0)); + info.type.Assign(GetType(tex.type)); + info.is_depth.Assign(tex.dc != 0 ? 1 : 0); info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); info.has_lod_clamp.Assign(lc ? 1 : 0); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp index fe2c7db85..3500a4559 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp @@ -95,18 +95,21 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { {}, info); case 4: // 2D.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, {}, {}, info); case 5: // 2D.LL.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b + 1), v.F(reg_b), {}, {}, info); case 6: // 2D.LZ.DC CheckAlignment(reg_a, 2); - info.type.Assign(TextureType::Shadow2D); + info.type.Assign(TextureType::Color2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), zero, {}, {}, info); case 7: // ARRAY_2D @@ -124,7 +127,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { case 9: // ARRAY_2D.LZ.DC CheckAlignment(reg_a, 2); CheckAlignment(reg_b, 2); - info.type.Assign(TextureType::ShadowArray2D); + info.type.Assign(TextureType::ColorArray2D); + info.is_depth.Assign(1); return v.ir.ImageSampleDrefExplicitLod( handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), v.F(reg_b + 1), zero, {}, {}, info); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index b2f9cda46..218cbc1a8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp @@ -37,24 +37,24 @@ enum class ComponentType : u64 { A = 3, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -163,7 +163,8 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy dref = v.F(meta_reg++); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld4.type, tld4.dc != 0)); + info.type.Assign(GetType(tld4.type)); + info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); info.gather_component.Assign(static_cast(component_type)); const IR::Value sample{[&] { if (tld4.dc == 0) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp index 2ba9c1018..34efa2d50 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp @@ -59,7 +59,8 @@ IR::Value Sample(TranslatorVisitor& v, u64 insn) { info.relaxed_precision.Assign(1); } info.gather_component.Assign(static_cast(tld4s.component_type.Value())); - info.type.Assign(tld4s.dc != 0 ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D); + info.type.Assign(Shader::TextureType::Color2D); + info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); IR::Value coords; if (tld4s.aoffi != 0) { CheckAlignment(reg_a, 2); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index c66468a48..c3fe3ffda 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -152,7 +152,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { } IR::TextureInstInfo info{}; - info.type.Assign(GetType(txd.type, false)); + info.type.Assign(GetType(txd.type)); info.num_derivates.Assign(num_derivates); info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp index 987b7ec34..983058303 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -137,7 +137,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { throw NotImplementedException("TLD.CL - CLAMP is not implmented"); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tld.type, false)); + info.type.Assign(GetType(tld.type)); const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; IR::Reg dest_reg{tld.dest_reg}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp index 0863bdfcd..5dd7e31b2 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include "common/bit_field.h" #include "common/common_types.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp index b6efc04f0..2277d24ff 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp @@ -23,24 +23,24 @@ enum class TextureType : u64 { ARRAY_CUBE, }; -Shader::TextureType GetType(TextureType type, bool dc) { +Shader::TextureType GetType(TextureType type) { switch (type) { case TextureType::_1D: - return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; + return Shader::TextureType::Color1D; case TextureType::ARRAY_1D: - return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; + return Shader::TextureType::ColorArray1D; case TextureType::_2D: - return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; + return Shader::TextureType::Color2D; case TextureType::ARRAY_2D: - return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; + return Shader::TextureType::ColorArray2D; case TextureType::_3D: - return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; + return Shader::TextureType::Color3D; case TextureType::ARRAY_3D: throw NotImplementedException("3D array texture type"); case TextureType::CUBE: - return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; + return Shader::TextureType::ColorCube; case TextureType::ARRAY_CUBE: - return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; + return Shader::TextureType::ColorArrayCube; } throw NotImplementedException("Invalid texture type {}", type); } @@ -97,7 +97,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { handle = v.ir.Imm32(static_cast(tmml.cbuf_offset.Value() * 4)); } IR::TextureInstInfo info{}; - info.type.Assign(GetType(tmml.type, false)); + info.type.Assign(GetType(tmml.type)); const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; IR::Reg dest_reg{tmml.dest_reg}; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index edbfcd308..bc23b0211 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -416,8 +416,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ImageQueryLod: case IR::Opcode::ImageGradient: { const TextureType type{inst.Flags().type}; - info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || - type == TextureType::Shadow1D || type == TextureType::ShadowArray1D; + info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; info.uses_sparse_residency |= inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index c8aee3d3d..a7b1fcfad 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -61,6 +61,12 @@ IR::Opcode IndexedInstruction(const IR::Inst& inst) { case IR::Opcode::BoundImageGradient: case IR::Opcode::BindlessImageGradient: return IR::Opcode::ImageGradient; + case IR::Opcode::BoundImageRead: + case IR::Opcode::BindlessImageRead: + return IR::Opcode::ImageRead; + case IR::Opcode::BoundImageWrite: + case IR::Opcode::BindlessImageWrite: + return IR::Opcode::ImageWrite; default: return IR::Opcode::Void; } @@ -78,6 +84,8 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BindlessImageQueryDimensions: case IR::Opcode::BindlessImageQueryLod: case IR::Opcode::BindlessImageGradient: + case IR::Opcode::BindlessImageRead: + case IR::Opcode::BindlessImageWrite: return true; case IR::Opcode::BoundImageSampleImplicitLod: case IR::Opcode::BoundImageSampleExplicitLod: @@ -89,6 +97,8 @@ bool IsBindless(const IR::Inst& inst) { case IR::Opcode::BoundImageQueryDimensions: case IR::Opcode::BoundImageQueryLod: case IR::Opcode::BoundImageGradient: + case IR::Opcode::BoundImageRead: + case IR::Opcode::BoundImageWrite: return false; default: throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); @@ -147,10 +157,18 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: - explicit Descriptors(TextureDescriptors& texture_descriptors_, - TextureBufferDescriptors& texture_buffer_descriptors_) - : texture_descriptors{texture_descriptors_}, texture_buffer_descriptors{ - texture_buffer_descriptors_} {} + explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + TextureDescriptors& texture_descriptors_, + ImageDescriptors& image_descriptors_) + : texture_buffer_descriptors{texture_buffer_descriptors_}, + texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} + + u32 Add(const TextureBufferDescriptor& desc) { + return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { @@ -159,11 +177,14 @@ public: }); } - u32 Add(const TextureBufferDescriptor& desc) { - return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && + u32 Add(const ImageDescriptor& desc) { + const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { + return desc.type == existing.type && desc.format == existing.format && + desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset; - }); + })}; + image_descriptors[index].is_written |= desc.is_written; + return index; } private: @@ -178,8 +199,9 @@ private: return static_cast(descriptors.size()) - 1; } - TextureDescriptors& texture_descriptors; TextureBufferDescriptors& texture_buffer_descriptors; + TextureDescriptors& texture_descriptors; + ImageDescriptors& image_descriptors; }; } // Anonymous namespace @@ -201,8 +223,9 @@ void TexturePass(Environment& env, IR::Program& program) { return lhs.cbuf.index < rhs.cbuf.index; }); Descriptors descriptors{ - program.info.texture_descriptors, program.info.texture_buffer_descriptors, + program.info.texture_descriptors, + program.info.image_descriptors, }; for (TextureInst& texture_inst : to_replace) { // TODO: Handle arrays @@ -233,19 +256,41 @@ void TexturePass(Environment& env, IR::Program& program) { break; } u32 index; - if (flags.type == TextureType::Buffer) { - index = descriptors.Add(TextureBufferDescriptor{ - .cbuf_index = cbuf.index, - .cbuf_offset = cbuf.offset, - .count = 1, - }); - } else { - index = descriptors.Add(TextureDescriptor{ - .type = flags.type, - .cbuf_index = cbuf.index, - .cbuf_offset = cbuf.offset, - .count = 1, - }); + switch (inst->GetOpcode()) { + case IR::Opcode::ImageRead: + case IR::Opcode::ImageWrite: { + const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; + if (flags.type == TextureType::Buffer) { + throw NotImplementedException("Image buffer"); + } else { + index = descriptors.Add(ImageDescriptor{ + .type = flags.type, + .format = flags.image_format, + .is_written = is_written, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } + break; + } + default: + if (flags.type == TextureType::Buffer) { + index = descriptors.Add(TextureBufferDescriptor{ + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } else { + index = descriptors.Add(TextureDescriptor{ + .type = flags.type, + .is_depth = flags.is_depth != 0, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); + } + break; } inst->SetArg(0, IR::Value{index}); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 4cc731198..253b6eacf 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -22,15 +22,20 @@ enum class TextureType : u32 { Color3D, ColorCube, ColorArrayCube, - Shadow1D, - ShadowArray1D, - Shadow2D, - ShadowArray2D, - Shadow3D, - ShadowCube, - ShadowArrayCube, Buffer, }; +constexpr u32 NUM_TEXTURE_TYPES = 8; + +enum class ImageFormat : u32 { + Typeless, + R8_UINT, + R8_SINT, + R16_UINT, + R16_SINT, + R32_UINT, + R32G32_UINT, + R32G32B32A32_UINT, +}; enum class Interpolation { Smooth, @@ -43,32 +48,43 @@ struct InputVarying { bool used{false}; }; -struct TextureDescriptor { - TextureType type; +struct ConstantBufferDescriptor { + u32 index; + u32 count; +}; + +struct StorageBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + bool is_written; }; -using TextureDescriptors = boost::container::small_vector; struct TextureBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; }; -using TextureBufferDescriptors = boost::container::small_vector; +using TextureBufferDescriptors = boost::container::small_vector; -struct ConstantBufferDescriptor { - u32 index; +struct TextureDescriptor { + TextureType type; + bool is_depth; + u32 cbuf_index; + u32 cbuf_offset; u32 count; }; +using TextureDescriptors = boost::container::small_vector; -struct StorageBufferDescriptor { +struct ImageDescriptor { + TextureType type; + ImageFormat format; + bool is_written; u32 cbuf_index; u32 cbuf_offset; u32 count; - bool is_written; }; +using ImageDescriptors = boost::container::small_vector; struct Info { static constexpr size_t MAX_CBUFS{18}; @@ -121,6 +137,7 @@ struct Info { boost::container::static_vector storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; TextureDescriptors texture_descriptors; + ImageDescriptors image_descriptors; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 6c0d5c7f4..39fe9289b 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -361,7 +361,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV .operation = operation, }; const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); @@ -435,7 +435,7 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); + const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index cff93cc60..d2c3f11c1 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,6 +97,9 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } + for (const auto& desc : info.image_descriptors) { + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); + } } private: @@ -127,36 +130,6 @@ private: size_t offset{}; }; -inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { - switch (type) { - case Shader::TextureType::Color1D: - case Shader::TextureType::Shadow1D: - return VideoCommon::ImageViewType::e1D; - case Shader::TextureType::ColorArray1D: - case Shader::TextureType::ShadowArray1D: - return VideoCommon::ImageViewType::e1DArray; - case Shader::TextureType::Color2D: - case Shader::TextureType::Shadow2D: - return VideoCommon::ImageViewType::e2D; - case Shader::TextureType::ColorArray2D: - case Shader::TextureType::ShadowArray2D: - return VideoCommon::ImageViewType::e2DArray; - case Shader::TextureType::Color3D: - case Shader::TextureType::Shadow3D: - return VideoCommon::ImageViewType::e3D; - case Shader::TextureType::ColorCube: - case Shader::TextureType::ShadowCube: - return VideoCommon::ImageViewType::Cube; - case Shader::TextureType::ColorArrayCube: - case Shader::TextureType::ShadowArrayCube: - return VideoCommon::ImageViewType::CubeArray; - case Shader::TextureType::Buffer: - break; - } - UNREACHABLE_MSG("Invalid texture type {}", type); - return {}; -} - inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { @@ -164,9 +137,17 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); } + for (const auto& desc : info.image_descriptors) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ac47b1f3c..3d690f335 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -108,6 +108,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 57e2d569c..23c01f24e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -186,6 +186,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + for (const auto& desc : info.image_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0292a1b94..2ba44330f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -494,7 +494,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, if (!image_view) { return false; } - screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); + screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 70328680d..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -215,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return VK_COMPONENT_SWIZZLE_ZERO; } +[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { + switch (type) { + case Shader::TextureType::Color1D: + return VK_IMAGE_VIEW_TYPE_1D; + case Shader::TextureType::Color2D: + return VK_IMAGE_VIEW_TYPE_2D; + case Shader::TextureType::ColorCube: + return VK_IMAGE_VIEW_TYPE_CUBE; + case Shader::TextureType::Color3D: + return VK_IMAGE_VIEW_TYPE_3D; + case Shader::TextureType::ColorArray1D: + return VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case Shader::TextureType::ColorArray2D: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case Shader::TextureType::ColorArrayCube: + return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; + case Shader::TextureType::Buffer: + UNREACHABLE_MSG("Texture buffers can't be image views"); + return VK_IMAGE_VIEW_TYPE_1D; + } + UNREACHABLE_MSG("Invalid image view type={}", type); + return VK_IMAGE_VIEW_TYPE_2D; +} + [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { switch (type) { case VideoCommon::ImageViewType::e1D: @@ -232,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { case VideoCommon::ImageViewType::CubeArray: return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; case VideoCommon::ImageViewType::Rect: - LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); + UNIMPLEMENTED_MSG("Rect image view"); return VK_IMAGE_VIEW_TYPE_2D; case VideoCommon::ImageViewType::Buffer: UNREACHABLE_MSG("Texture buffers can't be image views"); @@ -539,6 +563,28 @@ struct RangedBarrierRange { } }; +[[nodiscard]] VkFormat Format(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return VK_FORMAT_R8_SINT; + case Shader::ImageFormat::R8_UINT: + return VK_FORMAT_R8_UINT; + case Shader::ImageFormat::R16_UINT: + return VK_FORMAT_R16_UINT; + case Shader::ImageFormat::R16_SINT: + return VK_FORMAT_R16_SINT; + case Shader::ImageFormat::R32_UINT: + return VK_FORMAT_R32_UINT; + case Shader::ImageFormat::R32G32_UINT: + return VK_FORMAT_R32G32_UINT; + case Shader::ImageFormat::R32G32B32A32_UINT: + return VK_FORMAT_R32G32B32A32_UINT; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return VK_FORMAT_R32_UINT; +} } // Anonymous namespace void TextureCacheRuntime::Finish() { @@ -577,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst return; } } - ASSERT(src.ImageFormat() == dst.ImageFormat()); + ASSERT(src.format == dst.format); ASSERT(!(is_dst_msaa && !is_src_msaa)); ASSERT(operation == Fermi2D::Operation::SrcCopy); @@ -915,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span swizzle{ SwizzleSource::R, @@ -954,39 +1001,39 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }, .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), }; - const auto create = [&](VideoCommon::ImageViewType view_type, std::optional num_layers) { + const auto create = [&](TextureType tex_type, std::optional num_layers) { VkImageViewCreateInfo ci{create_info}; - ci.viewType = ImageViewType(view_type); + ci.viewType = ImageViewType(tex_type); if (num_layers) { ci.subresourceRange.layerCount = *num_layers; } vk::ImageView handle = device->GetLogical().CreateImageView(ci); if (device->HasDebuggingToolAttached()) { - handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); + handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } - image_views[static_cast(view_type)] = std::move(handle); + image_views[static_cast(tex_type)] = std::move(handle); }; switch (info.type) { case VideoCommon::ImageViewType::e1D: case VideoCommon::ImageViewType::e1DArray: - create(VideoCommon::ImageViewType::e1D, 1); - create(VideoCommon::ImageViewType::e1DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e1DArray); + create(TextureType::Color1D, 1); + create(TextureType::ColorArray1D, std::nullopt); + render_target = Handle(TextureType::ColorArray1D); break; case VideoCommon::ImageViewType::e2D: case VideoCommon::ImageViewType::e2DArray: - create(VideoCommon::ImageViewType::e2D, 1); - create(VideoCommon::ImageViewType::e2DArray, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e2DArray); + create(TextureType::Color2D, 1); + create(TextureType::ColorArray2D, std::nullopt); + render_target = Handle(Shader::TextureType::ColorArray2D); break; case VideoCommon::ImageViewType::e3D: - create(VideoCommon::ImageViewType::e3D, std::nullopt); - render_target = Handle(VideoCommon::ImageViewType::e3D); + create(TextureType::Color3D, std::nullopt); + render_target = Handle(Shader::TextureType::Color3D); break; case VideoCommon::ImageViewType::Cube: case VideoCommon::ImageViewType::CubeArray: - create(VideoCommon::ImageViewType::Cube, 6); - create(VideoCommon::ImageViewType::CubeArray, std::nullopt); + create(TextureType::ColorCube, 6); + create(TextureType::ColorArrayCube, std::nullopt); break; case VideoCommon::ImageViewType::Rect: UNIMPLEMENTED(); @@ -1009,7 +1056,8 @@ VkImageView ImageView::DepthView() { if (depth_view) { return *depth_view; } - depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); return *depth_view; } @@ -1017,18 +1065,38 @@ VkImageView ImageView::StencilView() { if (stencil_view) { return *stencil_view; } - stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); + const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); + stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); return *stencil_view; } -vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { +VkImageView ImageView::StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + auto& view{views[static_cast(texture_type)]}; + if (view) { + return *view; + } + view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); + return *view; +} + +vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { return device->GetLogical().CreateImageView({ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = nullptr, .flags = 0, .image = image_handle, .viewType = ImageViewType(type), - .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, + .format = vk_format, .components{ .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 498e76a1c..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/texture_cache/texture_cache.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" @@ -133,8 +134,11 @@ public: [[nodiscard]] VkImageView StencilView(); - [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { - return *image_views[static_cast(query_type)]; + [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + + [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { + return *image_views[static_cast(texture_type)]; } [[nodiscard]] VkImage ImageHandle() const noexcept { @@ -145,10 +149,6 @@ public: return render_target; } - [[nodiscard]] PixelFormat ImageFormat() const noexcept { - return image_format; - } - [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { return samples; } @@ -162,15 +162,20 @@ public: } private: - [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); + struct StorageViews { + std::array signeds; + std::array unsigneds; + }; + + [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; - std::array image_views; + std::array image_views; + std::unique_ptr storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; - PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5e8d99482..255b07cf8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -117,6 +117,9 @@ public: /// Return a reference to the given image view id [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + /// Fill image_view_ids with the graphics images in indices void FillGraphicsImageViews(std::span indices, std::span image_view_ids); @@ -526,6 +529,11 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { return slot_image_views[id]; } +template +void TextureCache

::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} + template void TextureCache

::FillGraphicsImageViews(std::span indices, std::span image_view_ids) { -- cgit v1.2.3 From e5e79648cfa3ac9d30c00bccf4252cd0dc93bccc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Jul 2021 18:16:56 -0300 Subject: pipeline_helper: Add missing [[maybe_unused]] --- src/video_core/renderer_vulkan/pipeline_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index d2c3f11c1..a39459b2e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -97,7 +97,7 @@ public: for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.image_descriptors) { + for ([[maybe_unused]] const auto& desc : info.image_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); } } -- cgit v1.2.3 From 1030b612a36f6b44e3b25215039748b01cfb9b8c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:12:56 -0300 Subject: vk_rasterizer: Request outside render pass execution context for compute --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2ba44330f..7df169c85 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -279,6 +279,7 @@ void RasterizerVulkan::DispatchCompute() { const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } -- cgit v1.2.3 From ab543f18213133b3076b81f30df386d5cb470e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:37:03 -0300 Subject: spirv: Guard against typeless image reads on unsupported devices --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 +++- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 4 ++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 7 +++++++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 6 files changed, 17 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 63ed92a5d..db7b3f1b2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -238,11 +238,13 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::SampledBuffer); - ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index dd261fd47..17266ce77 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -388,6 +388,10 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { const auto info{inst->Flags()}; + if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { + // LOG_WARNING(..., "Typeless image read not supported by host"); + return ctx.ConstantNull(ctx.U32[4]); + } return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], Image(ctx, index, info), coords, std::nullopt, std::span{}); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 8c63c9876..9ef8688c9 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -421,6 +421,13 @@ void VisitUsages(Info& info, IR::Inst& inst) { inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; break; } + case IR::Opcode::ImageRead: { + const auto flags{inst.Flags()}; + info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; + info.uses_sparse_residency |= + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; + break; + } case IR::Opcode::SubgroupEqMask: case IR::Opcode::SubgroupLtMask: case IR::Opcode::SubgroupLeMask: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e13cb948a..f0d68d516 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -36,6 +36,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; + bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; // FClamp is broken and OpFMax + OpFMin should be used instead diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 253b6eacf..3fbe99268 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -127,6 +127,7 @@ struct Info { bool uses_subgroup_vote{}; bool uses_subgroup_mask{}; bool uses_fswzadd{}; + bool uses_typeless_image_reads{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fcebb8f6e..25dbefd5c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -635,6 +635,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, -- cgit v1.2.3 From 479ca00071ccaab6ca9ac28daf375e1ed15dc447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 02:50:30 -0300 Subject: nsight_aftermath_tracker: Report used shaders to Nsight Aftermath --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- .../vulkan_common/nsight_aftermath_tracker.cpp | 5 ++--- .../vulkan_common/nsight_aftermath_tracker.h | 21 +++++++++++---------- src/video_core/vulkan_common/vulkan_device.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 3 ++- 6 files changed, 20 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2cfe9d4bd..ec9866605 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + device.SaveShader(code); pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 25dbefd5c..f699a9bdf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -770,6 +770,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const Shader::Profile profile{MakeProfile(key, program.stage)}; const std::vector code{EmitSPIRV(profile, program, binding)}; + device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], @@ -846,7 +847,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; u32 binding{0}; - std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program, binding)}; + device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 758c038ba..209cb1e0a 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -73,12 +73,11 @@ NsightAftermathTracker::~NsightAftermathTracker() { } } -void NsightAftermathTracker::SaveShader(const std::vector& spirv) const { +void NsightAftermathTracker::SaveShader(std::span spirv) const { if (!initialized) { return; } - - std::vector spirv_copy = spirv; + std::vector spirv_copy(spirv.begin(), spirv.end()); GFSDK_Aftermath_SpirvCode shader; shader.pData = spirv_copy.data(); shader.size = static_cast(spirv_copy.size() * 4); diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.h b/src/video_core/vulkan_common/nsight_aftermath_tracker.h index 4fe2b14d9..eae1891dd 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.h +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.h @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -33,7 +34,7 @@ public: NsightAftermathTracker(NsightAftermathTracker&&) = delete; NsightAftermathTracker& operator=(NsightAftermathTracker&&) = delete; - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; private: #ifdef HAS_NSIGHT_AFTERMATH @@ -61,21 +62,21 @@ private: bool initialized = false; Common::DynamicLibrary dl; - PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps; - PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps; - PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier; - PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv; - PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder; - PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON; - PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON; + PFN_GFSDK_Aftermath_DisableGpuCrashDumps GFSDK_Aftermath_DisableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_EnableGpuCrashDumps GFSDK_Aftermath_EnableGpuCrashDumps{}; + PFN_GFSDK_Aftermath_GetShaderDebugInfoIdentifier GFSDK_Aftermath_GetShaderDebugInfoIdentifier{}; + PFN_GFSDK_Aftermath_GetShaderHashSpirv GFSDK_Aftermath_GetShaderHashSpirv{}; + PFN_GFSDK_Aftermath_GpuCrashDump_CreateDecoder GFSDK_Aftermath_GpuCrashDump_CreateDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_DestroyDecoder GFSDK_Aftermath_GpuCrashDump_DestroyDecoder{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GenerateJSON GFSDK_Aftermath_GpuCrashDump_GenerateJSON{}; + PFN_GFSDK_Aftermath_GpuCrashDump_GetJSON GFSDK_Aftermath_GpuCrashDump_GetJSON{}; #endif }; #ifndef HAS_NSIGHT_AFTERMATH inline NsightAftermathTracker::NsightAftermathTracker() = default; inline NsightAftermathTracker::~NsightAftermathTracker() = default; -inline void NsightAftermathTracker::SaveShader(const std::vector&) const {} +inline void NsightAftermathTracker::SaveShader(std::span) const {} #endif } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index c027598ba..78bb741bc 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -493,7 +493,7 @@ void Device::ReportLoss() const { std::this_thread::sleep_for(std::chrono::seconds{15}); } -void Device::SaveShader(const std::vector& spirv) const { +void Device::SaveShader(std::span spirv) const { if (nsight_aftermath_tracker) { nsight_aftermath_tracker->SaveShader(spirv); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ac2311e7e..adf62a707 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -43,7 +44,7 @@ public: void ReportLoss() const; /// Reports a shader to Nsight Aftermath. - void SaveShader(const std::vector& spirv) const; + void SaveShader(std::span spirv) const; /// Returns the name of the VkDriverId reported from Vulkan. std::string GetDriverName() const; -- cgit v1.2.3 From 3db2b3effa953ae66457b7a19b419fc4db2c4801 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Apr 2021 02:07:02 -0400 Subject: shader: Implement ATOM/S and RED --- src/shader_recompiler/CMakeLists.txt | 3 + .../backend/spirv/emit_context.cpp | 158 +++++- src/shader_recompiler/backend/spirv/emit_context.h | 20 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 + src/shader_recompiler/backend/spirv/emit_spirv.h | 95 ++++ .../backend/spirv/emit_spirv_atomic.cpp | 528 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 200 +++++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 39 ++ .../frontend/ir/microinstruction.cpp | 66 +++ src/shader_recompiler/frontend/ir/opcodes.inc | 70 +++ .../impl/atomic_operations_global_memory.cpp | 222 +++++++++ .../impl/atomic_operations_shared_memory.cpp | 110 +++++ .../maxwell/translate/impl/not_implemented.cpp | 12 - .../ir_opt/collect_shader_info_pass.cpp | 70 +++ .../global_memory_to_storage_buffer_pass.cpp | 121 ++++- .../ir_opt/lower_fp16_to_fp32.cpp | 12 + src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 13 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 14 + src/video_core/vulkan_common/vulkan_device.h | 6 + 21 files changed, 1745 insertions(+), 19 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8e1d37373..7b9f08aa0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h + backend/spirv/emit_spirv_atomic.cpp backend/spirv/emit_spirv_barriers.cpp backend/spirv/emit_spirv_bitwise_conversion.cpp backend/spirv/emit_spirv_composite.cpp @@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h + frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp + frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp frontend/maxwell/translate/impl/barrier_operations.cpp frontend/maxwell/translate/impl/bitfield_extract.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 32f8c4508..e5d83e9b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,6 +15,53 @@ namespace Shader::Backend::SPIRV { namespace { +enum class CasFunctionType { + Increment, + Decrement, + FPAdd, + FPMin, + FPMax, +}; + +Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (function_type) { + case CasFunctionType::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case CasFunctionType::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case CasFunctionType::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case CasFunctionType::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case CasFunctionType::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; const Id type{ctx.F32[1]}; @@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) { } } +Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { + const Id loop_header{OpLabel()}; + const Id continue_block{OpLabel()}; + const Id merge_block{OpLabel()}; + const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type + : storage_memory_u32}; + const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; + const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{OpFunctionParameter(U32[1])}; + const Id op_b{OpFunctionParameter(value_type)}; + const Id base{OpFunctionParameter(storage_type)}; + AddLabel(); + const Id one{Constant(U32[1], 1)}; + OpBranch(loop_header); + AddLabel(loop_header); + OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + OpBranch(continue_block); + + AddLabel(continue_block); + const Id word_pointer{pointer_type == CasPointerType::Shared + ? OpAccessChain(shared_u32, base, index) + : OpAccessChain(storage_u32, base, u32_zero_value, index)}; + if (value_type.value == F32[2].value) { + const Id u32_value{OpLoad(U32[1], word_pointer)}; + const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; + const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; + const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, + u32_zero_value, u32_new_value, u32_value)}; + const Id success{OpIEqual(U1, atomic_res, u32_value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); + } else { + const Id value{OpLoad(U32[1], word_pointer)}; + const Id new_value{OpBitcast( + U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, + u32_zero_value, new_value, value)}; + const Id success{OpIEqual(U1, atomic_res, value)}; + OpBranchConditional(success, merge_block, loop_header); + + AddLabel(merge_block); + OpReturnValue(OpBitcast(value_type, atomic_res)); + } + OpFunctionEnd(); + return func; +} + void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; - const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); - shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); + shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; @@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (program.info.uses_int16) { shared_store_u16_func = make_function(16, 16); } + if (program.info.uses_shared_increment) { + const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; + increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); + } + if (program.info.uses_shared_decrement) { + const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; + decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); + } } void EmitContext::DefineAttributeMemAccess(const Info& info) { @@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { MemberName(struct_type, 0, "data"); MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); u32 index{}; for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { - const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; + const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", index)); @@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { index += desc.count; binding += desc.count; } + if (info.uses_global_increment) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; + increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_global_decrement) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; + decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_atomic_f32_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; + f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); + } + if (info.uses_atomic_f16x2_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; + f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f16x2_min) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; + f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f16x2_max) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; + f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + } + if (info.uses_atomic_f32x2_add) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; + f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); + } + if (info.uses_atomic_f32x2_min) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; + f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + } + if (info.uses_atomic_f32x2_max) { + AddCapability(spv::Capability::VariablePointersStorageBuffer); + const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; + f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + } } void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e70f3458c..34f38454f 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -94,6 +94,7 @@ public: Id output_f32{}; Id storage_u32{}; + Id storage_memory_u32{}; Id image_buffer_type{}; Id sampled_texture_buffer_type{}; @@ -136,9 +137,21 @@ public: Id shared_memory_u32{}; Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u32_type{}; Id shared_store_u8_func{}; Id shared_store_u16_func{}; + Id increment_cas_shared{}; + Id increment_cas_ssbo{}; + Id decrement_cas_shared{}; + Id decrement_cas_ssbo{}; + Id f32_add_cas{}; + Id f16x2_add_cas{}; + Id f16x2_min_cas{}; + Id f16x2_max_cas{}; + Id f32x2_add_cas{}; + Id f32x2_min_cas{}; + Id f32x2_max_cas{}; Id input_position{}; std::array input_generics{}; @@ -153,6 +166,11 @@ public: std::vector interfaces; private: + enum class CasPointerType { + Shared, + Ssbo, + }; + void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); @@ -171,6 +189,8 @@ private: void DefineInputs(const Info& info); void DefineOutputs(const Info& info); + + [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5a1ffd61c..9248bd78b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } + if (info.uses_64_bit_atomics && profile.support_int64_atomics) { + ctx.AddCapability(spv::Capability::Int64Atomics); + } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 12b7993ae..a3398a605 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..03d891419 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -0,0 +1,528 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { +namespace { + +Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; + return ctx.profile.support_explicit_workgroup_layout + ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) + : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); +} + +Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast(offset.U32() / element_size)}; + return ctx.Constant(ctx.U32[1], imm_offset); + } + const u32 shift{static_cast(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + +Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + u32 index_offset = 0) { + // TODO: Support reinterpreting bindings, guaranteed to be aligned + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; + return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); +} + +std::pair GetAtomicArgs(EmitContext& ctx) { + const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; + const Id semantics{ctx.u32_zero_value}; + return {scope, semantics}; +} + +Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; + return ctx.OpBitcast(ctx.U64, original_composite); +} + +void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { + const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); +} +} // Anonymous namespace + +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, + ctx.shared_memory_u32); +} + +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, + ctx.shared_memory_u32); +} + +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer{GetSharedPointer(ctx, pointer_offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { + const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + StoreResult(ctx, pointer_1, pointer_2, value); + return original_value; +} + +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); +} + +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer{GetStoragePointer(ctx, binding, offset)}; + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +} + +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; + StoreResult(ctx, pointer_1, pointer_2, result); + return original_value; +} + +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; + if (ctx.profile.support_int64_atomics) { + const auto [scope, semantics]{GetAtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + } + // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; + const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; + StoreResult(ctx, pointer_1, pointer_2, value); + return original_value; +} + +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); +} + +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; + return ctx.OpBitcast(ctx.U32[1], result); +} + +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; + return ctx.OpPackHalf2x16(ctx.U32[1], result); +} + +Id EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 17be0c639..a3339f624 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) return Inst(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); } +U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicIAdd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMin32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMin(pointer_offset, value) + : SharedAtomicUMin(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicSMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicUMax32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { + return is_signed ? SharedAtomicSMax(pointer_offset, value) + : SharedAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicDec32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicAnd32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicOr32, pointer_offset, value); +} + +U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { + return Inst(Opcode::SharedAtomicXor32, pointer_offset, value); +} + +U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::SharedAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicIAdd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicIAdd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMin32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMin64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMin(pointer_offset, value) + : GlobalAtomicUMin(pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicSMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicSMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicUMax32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicUMax64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { + return is_signed ? GlobalAtomicSMax(pointer_offset, value) + : GlobalAtomicUMax(pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicInc32, pointer_offset, value); +} + +U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { + return Inst(Opcode::GlobalAtomicDec32, pointer_offset, value); +} + +U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicAnd32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicAnd64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicOr32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicOr64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicXor32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicXor64, pointer_offset, value); + default: + ThrowInvalidType(value.Type()); + } +} + +U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { + switch (value.Type()) { + case Type::U32: + return Inst(Opcode::GlobalAtomicExchange32, pointer_offset, value); + case Type::U64: + return Inst(Opcode::GlobalAtomicExchange64, pointer_offset, value); + default: + ThrowInvalidType(pointer_offset.Type()); + } +} + +F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); +} + +Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control) { + return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); +} + U1 IREmitter::LogicalOr(const U1& a, const U1& b) { return Inst(Opcode::LogicalOr, a, b); } @@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst } void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, - TextureInstInfo info) { + TextureInstInfo info) { const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; Inst(op, Flags{info}, handle, coords, color); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ec60070ef..f9cbf1304 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -228,6 +228,45 @@ public: [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); + [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); + [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); + + [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, + bool is_signed); + [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); + [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); + [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); + + [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, + const FpControl control = {}); + [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 2df631791..0f66c5627 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::WriteSharedU128: + case Opcode::SharedAtomicIAdd32: + case Opcode::SharedAtomicSMin32: + case Opcode::SharedAtomicUMin32: + case Opcode::SharedAtomicSMax32: + case Opcode::SharedAtomicUMax32: + case Opcode::SharedAtomicInc32: + case Opcode::SharedAtomicDec32: + case Opcode::SharedAtomicAnd32: + case Opcode::SharedAtomicOr32: + case Opcode::SharedAtomicXor32: + case Opcode::SharedAtomicExchange32: + case Opcode::SharedAtomicExchange64: + case Opcode::GlobalAtomicIAdd32: + case Opcode::GlobalAtomicSMin32: + case Opcode::GlobalAtomicUMin32: + case Opcode::GlobalAtomicSMax32: + case Opcode::GlobalAtomicUMax32: + case Opcode::GlobalAtomicInc32: + case Opcode::GlobalAtomicDec32: + case Opcode::GlobalAtomicAnd32: + case Opcode::GlobalAtomicOr32: + case Opcode::GlobalAtomicXor32: + case Opcode::GlobalAtomicExchange32: + case Opcode::GlobalAtomicIAdd64: + case Opcode::GlobalAtomicSMin64: + case Opcode::GlobalAtomicUMin64: + case Opcode::GlobalAtomicSMax64: + case Opcode::GlobalAtomicUMax64: + case Opcode::GlobalAtomicAnd64: + case Opcode::GlobalAtomicOr64: + case Opcode::GlobalAtomicXor64: + case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicAddF32: + case Opcode::GlobalAtomicAddF16x2: + case Opcode::GlobalAtomicAddF32x2: + case Opcode::GlobalAtomicMinF16x2: + case Opcode::GlobalAtomicMinF32x2: + case Opcode::GlobalAtomicMaxF16x2: + case Opcode::GlobalAtomicMaxF32x2: + case Opcode::StorageAtomicIAdd32: + case Opcode::StorageAtomicSMin32: + case Opcode::StorageAtomicUMin32: + case Opcode::StorageAtomicSMax32: + case Opcode::StorageAtomicUMax32: + case Opcode::StorageAtomicInc32: + case Opcode::StorageAtomicDec32: + case Opcode::StorageAtomicAnd32: + case Opcode::StorageAtomicOr32: + case Opcode::StorageAtomicXor32: + case Opcode::StorageAtomicExchange32: + case Opcode::StorageAtomicIAdd64: + case Opcode::StorageAtomicSMin64: + case Opcode::StorageAtomicUMin64: + case Opcode::StorageAtomicSMax64: + case Opcode::StorageAtomicUMax64: + case Opcode::StorageAtomicAnd64: + case Opcode::StorageAtomicOr64: + case Opcode::StorageAtomicXor64: + case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicAddF32: + case Opcode::StorageAtomicAddF16x2: + case Opcode::StorageAtomicAddF32x2: + case Opcode::StorageAtomicMinF16x2: + case Opcode::StorageAtomicMinF32x2: + case Opcode::StorageAtomicMaxF16x2: + case Opcode::StorageAtomicMaxF32x2: case Opcode::BindlessImageWrite: case Opcode::BoundImageWrite: case Opcode::ImageWrite: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 86ea02560..dc776a73e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32, OPCODE(SGreaterThanEqual, U1, U32, U32, ) OPCODE(UGreaterThanEqual, U1, U32, U32, ) +// Atomic operations +OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicInc32, U32, U32, U32, ) +OPCODE(SharedAtomicDec32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange32, U32, U32, U32, ) +OPCODE(SharedAtomicExchange64, U64, U32, U64, ) + +OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) +OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) +OPCODE(GlobalAtomicInc32, U32, U64, U32, ) +OPCODE(GlobalAtomicDec32, U32, U64, U32, ) +OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) +OPCODE(GlobalAtomicOr32, U32, U64, U32, ) +OPCODE(GlobalAtomicXor32, U32, U64, U32, ) +OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) +OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) +OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) +OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) +OPCODE(GlobalAtomicOr64, U64, U64, U64, ) +OPCODE(GlobalAtomicXor64, U64, U64, U64, ) +OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) +OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) +OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) +OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) + +OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) +OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) +OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) +OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) +OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) + // Logical operations OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -0,0 +1,222 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, + SAFEADD, +}; + +enum class AtomSize : u64 { + U32, + S32, + U64, + F32, + F16x2, + S64, +}; + +IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, + AtomOp op, bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.GlobalAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.GlobalAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.GlobalAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.GlobalAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.GlobalAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.GlobalAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.GlobalAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.GlobalAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.GlobalAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atom Operation {}", op); + } +} + +IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, + AtomSize size) { + static constexpr IR::FpControl f16_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::DontCare}, + }; + static constexpr IR::FpControl f32_control{ + .no_contraction{false}, + .rounding{IR::FpRounding::RN}, + .fmz_mode{IR::FmzMode::FTZ}, + }; + switch (op) { + case AtomOp::ADD: + return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) + : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); + case AtomOp::MIN: + return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); + case AtomOp::MAX: + return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); + default: + throw NotImplementedException("FP Atom Operation {}", op); + } +} + +IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> addr_reg; + BitField<28, 20, s64> addr_offset; + BitField<28, 20, u64> rz_addr_offset; + BitField<48, 1, u64> e; + } const mem{insn}; + + const IR::U64 address{[&]() -> IR::U64 { + if (mem.e == 0) { + return v.ir.UConvert(64, v.X(mem.addr_reg)); + } + return v.L(mem.addr_reg); + }()}; + const u64 addr_offset{[&]() -> u64 { + if (mem.addr_reg == IR::Reg::RZ) { + // When RZ is used, the address is an absolute address + return static_cast(mem.rz_addr_offset.Value()); + } else { + return static_cast(mem.addr_offset.Value()); + } + }()}; + return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); +} + +bool AtomOpNotApplicable(AtomSize size, AtomOp op) { + // TODO: SAFEADD + switch (size) { + case AtomSize::S32: + case AtomSize::U64: + return (op == AtomOp::INC || op == AtomOp::DEC); + case AtomSize::S64: + return !(op == AtomOp::MIN || op == AtomOp::MAX); + case AtomSize::F32: + return op != AtomOp::ADD; + case AtomSize::F16x2: + return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); + default: + return false; + } +} + +IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F32: + case AtomSize::F16x2: + return ir.LoadGlobal32(offset); + case AtomSize::U64: + case AtomSize::S64: + return ir.PackUint2x32(ir.LoadGlobal64(offset)); + default: + throw NotImplementedException("Atom Size {}", size); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { + switch (size) { + case AtomSize::U32: + case AtomSize::S32: + case AtomSize::F16x2: + return v.X(dest_reg, IR::U32{result}); + case AtomSize::U64: + case AtomSize::S64: + return v.L(dest_reg, IR::U64{result}); + case AtomSize::F32: + return v.F(dest_reg, IR::F32{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOM(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<49, 3, AtomSize> size; + BitField<52, 4, AtomOp> op; + } const atom{insn}; + + const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; + const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; + const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + IR::Value result; + + if (AtomOpNotApplicable(atom.size, atom.op)) { + result = LoadGlobal(ir, offset, atom.size); + } else if (!is_integer) { + if (atom.size == AtomSize::F32) { + result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; + result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); + } + } else if (size_64) { + result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); + } else { + result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); + } + StoreResult(*this, atom.dest_reg, result, atom.size); +} + +void TranslatorVisitor::RED(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> src_reg_b; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 3, AtomSize> size; + BitField<23, 3, AtomOp> op; + } const red{insn}; + + if (AtomOpNotApplicable(red.size, red.op)) { + return; + } + const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; + const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; + const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; + const IR::U64 offset{AtomOffset(*this, insn)}; + if (!is_integer) { + if (red.size == AtomSize::F32) { + ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); + } else { + const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; + ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); + } + } else if (size_64) { + ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); + } else { + ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp @@ -0,0 +1,110 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class AtomOp : u64 { + ADD, + MIN, + MAX, + INC, + DEC, + AND, + OR, + XOR, + EXCH, +}; + +enum class AtomsSize : u64 { + U32, + S32, + U64, +}; + +IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, + bool is_signed) { + switch (op) { + case AtomOp::ADD: + return ir.SharedAtomicIAdd(offset, op_b); + case AtomOp::MIN: + return ir.SharedAtomicIMin(offset, op_b, is_signed); + case AtomOp::MAX: + return ir.SharedAtomicIMax(offset, op_b, is_signed); + case AtomOp::INC: + return ir.SharedAtomicInc(offset, op_b); + case AtomOp::DEC: + return ir.SharedAtomicDec(offset, op_b); + case AtomOp::AND: + return ir.SharedAtomicAnd(offset, op_b); + case AtomOp::OR: + return ir.SharedAtomicOr(offset, op_b); + case AtomOp::XOR: + return ir.SharedAtomicXor(offset, op_b); + case AtomOp::EXCH: + return ir.SharedAtomicExchange(offset, op_b); + default: + throw NotImplementedException("Integer Atoms Operation {}", op); + } +} + +IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> offset_reg; + BitField<30, 22, u64> absolute_offset; + BitField<30, 22, s64> relative_offset; + } const encoding{insn}; + + if (encoding.offset_reg == IR::Reg::RZ) { + return v.ir.Imm32(static_cast(encoding.absolute_offset << 2)); + } else { + const s32 relative{static_cast(encoding.relative_offset << 2)}; + return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); + } +} + +void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { + switch (size) { + case AtomsSize::U32: + case AtomsSize::S32: + return v.X(dest_reg, IR::U32{result}); + case AtomsSize::U64: + return v.L(dest_reg, IR::U64{result}); + default: + break; + } +} +} // Anonymous namespace + +void TranslatorVisitor::ATOMS(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, IR::Reg> src_reg_b; + BitField<28, 2, AtomsSize> size; + BitField<52, 4, AtomOp> op; + } const atoms{insn}; + + const bool size_64{atoms.size == AtomsSize::U64}; + if (size_64 && atoms.op != AtomOp::EXCH) { + throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); + } + const bool is_signed{atoms.size == AtomsSize::S32}; + const IR::U32 offset{AtomsOffset(*this, insn)}; + + IR::Value result; + if (size_64) { + result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); + } else { + result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); + } + StoreResult(*this, atoms.dest_reg, result, atoms.size); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { ThrowNotImplemented(Opcode::ATOM_cas); } -void TranslatorVisitor::ATOM(u64) { - ThrowNotImplemented(Opcode::ATOM); -} - void TranslatorVisitor::ATOMS_cas(u64) { ThrowNotImplemented(Opcode::ATOMS_cas); } -void TranslatorVisitor::ATOMS(u64) { - ThrowNotImplemented(Opcode::ATOMS); -} - void TranslatorVisitor::B2R(u64) { ThrowNotImplemented(Opcode::B2R); } @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { ThrowNotImplemented(Opcode::RAM); } -void TranslatorVisitor::RED(u64) { - ThrowNotImplemented(Opcode::RED); -} - void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9ef8688c9..73373576b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPOrdGreaterThanEqual16: case IR::Opcode::FPUnordGreaterThanEqual16: case IR::Opcode::FPIsNan16: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: info.uses_fp16 = true; break; case IR::Opcode::CompositeConstructF64x2: @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF16U64: case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: + case IR::Opcode::SharedAtomicExchange64: info.uses_int64 = true; break; default: @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::SharedAtomicInc32: + info.uses_shared_increment = true; + break; + case IR::Opcode::SharedAtomicDec32: + info.uses_shared_decrement = true; + break; + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::StorageAtomicInc32: + info.uses_global_increment = true; + break; + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::StorageAtomicDec32: + info.uses_global_decrement = true; + break; + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::StorageAtomicAddF32: + info.uses_atomic_f32_add = true; + break; + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::StorageAtomicAddF16x2: + info.uses_atomic_f16x2_add = true; + break; + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::StorageAtomicAddF32x2: + info.uses_atomic_f32x2_add = true; + break; + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::StorageAtomicMinF16x2: + info.uses_atomic_f16x2_min = true; + break; + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::StorageAtomicMinF32x2: + info.uses_atomic_f32x2_min = true; + break; + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::StorageAtomicMaxF16x2: + info.uses_atomic_f16x2_max = true; + break; + case IR::Opcode::GlobalAtomicMaxF32x2: + case IR::Opcode::StorageAtomicMaxF32x2: + info.uses_atomic_f32x2_max = true; + break; + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: + info.uses_64_bit_atomics = true; + break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_64_bit_atomics = true; + info.uses_shared_memory_u32x2 = true; + break; default: break; } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index afe871505..0d4f266c3 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: return true; default: return false; @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::WriteStorage64; case IR::Opcode::WriteGlobal128: return IR::Opcode::WriteStorage128; + case IR::Opcode::GlobalAtomicIAdd32: + return IR::Opcode::StorageAtomicIAdd32; + case IR::Opcode::GlobalAtomicSMin32: + return IR::Opcode::StorageAtomicSMin32; + case IR::Opcode::GlobalAtomicUMin32: + return IR::Opcode::StorageAtomicUMin32; + case IR::Opcode::GlobalAtomicSMax32: + return IR::Opcode::StorageAtomicSMax32; + case IR::Opcode::GlobalAtomicUMax32: + return IR::Opcode::StorageAtomicUMax32; + case IR::Opcode::GlobalAtomicInc32: + return IR::Opcode::StorageAtomicInc32; + case IR::Opcode::GlobalAtomicDec32: + return IR::Opcode::StorageAtomicDec32; + case IR::Opcode::GlobalAtomicAnd32: + return IR::Opcode::StorageAtomicAnd32; + case IR::Opcode::GlobalAtomicOr32: + return IR::Opcode::StorageAtomicOr32; + case IR::Opcode::GlobalAtomicXor32: + return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicIAdd64: + return IR::Opcode::StorageAtomicIAdd64; + case IR::Opcode::GlobalAtomicSMin64: + return IR::Opcode::StorageAtomicSMin64; + case IR::Opcode::GlobalAtomicUMin64: + return IR::Opcode::StorageAtomicUMin64; + case IR::Opcode::GlobalAtomicSMax64: + return IR::Opcode::StorageAtomicSMax64; + case IR::Opcode::GlobalAtomicUMax64: + return IR::Opcode::StorageAtomicUMax64; + case IR::Opcode::GlobalAtomicAnd64: + return IR::Opcode::StorageAtomicAnd64; + case IR::Opcode::GlobalAtomicOr64: + return IR::Opcode::StorageAtomicOr64; + case IR::Opcode::GlobalAtomicXor64: + return IR::Opcode::StorageAtomicXor64; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; + case IR::Opcode::GlobalAtomicExchange64: + return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicAddF32: + return IR::Opcode::StorageAtomicAddF32; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF16x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF16x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF16x2; + case IR::Opcode::GlobalAtomicAddF32x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF32x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF32x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: throw InvalidArgument("Invalid global memory opcode {}", opcode); } @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index inst.Invalidate(); } +/// Replace an atomic operation on global memory instruction with its storage buffer equivalent +void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, + const IR::U32& offset) { + const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{ + &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; + inst.ReplaceUsesWith(value); +} + /// Replace a global memory instruction with its storage buffer equivalent void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); + case IR::Opcode::GlobalAtomicIAdd32: + case IR::Opcode::GlobalAtomicSMin32: + case IR::Opcode::GlobalAtomicUMin32: + case IR::Opcode::GlobalAtomicSMax32: + case IR::Opcode::GlobalAtomicUMax32: + case IR::Opcode::GlobalAtomicInc32: + case IR::Opcode::GlobalAtomicDec32: + case IR::Opcode::GlobalAtomicAnd32: + case IR::Opcode::GlobalAtomicOr32: + case IR::Opcode::GlobalAtomicXor32: + case IR::Opcode::GlobalAtomicExchange32: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicAddF32: + case IR::Opcode::GlobalAtomicAddF16x2: + case IR::Opcode::GlobalAtomicAddF32x2: + case IR::Opcode::GlobalAtomicMinF16x2: + case IR::Opcode::GlobalAtomicMinF32x2: + case IR::Opcode::GlobalAtomicMaxF16x2: + case IR::Opcode::GlobalAtomicMaxF32x2: + return ReplaceAtomic(block, inst, storage_index, offset); default: throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); } @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { CollectStorageBuffers(*block, inst, info); } } - u32 storage_index{}; for (const StorageBufferAddr& storage_buffer : info.set) { program.info.storage_buffers_descriptors.push_back({ .cbuf_index = storage_buffer.index, @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .count = 1, .is_written{info.writes.contains(storage_buffer)}, }); - ++storage_index; } for (const StorageInst& storage_inst : info.to_replace) { const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 52576b07f..62e73d52d 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::ConvertF32U32; case IR::Opcode::ConvertF16U64: return IR::Opcode::ConvertF32U64; + case IR::Opcode::GlobalAtomicAddF16x2: + return IR::Opcode::GlobalAtomicAddF32x2; + case IR::Opcode::StorageAtomicAddF16x2: + return IR::Opcode::StorageAtomicAddF32x2; + case IR::Opcode::GlobalAtomicMinF16x2: + return IR::Opcode::GlobalAtomicMinF32x2; + case IR::Opcode::StorageAtomicMinF16x2: + return IR::Opcode::StorageAtomicMinF32x2; + case IR::Opcode::GlobalAtomicMaxF16x2: + return IR::Opcode::GlobalAtomicMaxF32x2; + case IR::Opcode::StorageAtomicMaxF16x2: + return IR::Opcode::StorageAtomicMaxF32x2; default: return op; } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0d68d516..a4e41bda1 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,7 @@ struct Profile { bool support_viewport_index_layer_non_geometry{}; bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; + bool support_int64_atomics{}; // FClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3fbe99268..7bcecf554 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -128,6 +128,19 @@ struct Info { bool uses_subgroup_mask{}; bool uses_fswzadd{}; bool uses_typeless_image_reads{}; + bool uses_shared_increment{}; + bool uses_shared_decrement{}; + bool uses_global_increment{}; + bool uses_global_decrement{}; + bool uses_atomic_f32_add{}; + bool uses_atomic_f16x2_add{}; + bool uses_atomic_f16x2_min{}; + bool uses_atomic_f16x2_max{}; + bool uses_atomic_f32x2_add{}; + bool uses_atomic_f32x2_min{}; + bool uses_atomic_f32x2_max{}; + bool uses_64_bit_atomics{}; + bool uses_shared_memory_u32x2{}; IR::Type used_constant_buffer_types{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f699a9bdf..b953d694b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, device.IsExtShaderViewportIndexLayerSupported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 78bb741bc..911dfed44 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -681,6 +681,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; + bool has_ext_shader_atomic_int64{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -710,6 +711,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); if (Settings::values.renderer_debug) { @@ -760,6 +762,18 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_shader_atomic_int64) { + VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; + atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + atomic_int64.pNext = nullptr; + features.pNext = &atomic_int64; + physical.GetFeatures2KHR(features); + + if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { + extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); + ext_shader_atomic_int64 = true; + } + } if (has_ext_transform_feedback) { VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index adf62a707..4e6d13308 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -229,6 +229,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. + bool IsExtShaderAtomicInt64Supported() const { + return ext_shader_atomic_int64; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -320,6 +325,7 @@ private: bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. + bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From a33014022ed86c27ba4faa243fa6d0a69df75564 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Apr 2021 20:57:37 -0300 Subject: pipeline_helper: Simplify descriptor objects initialization --- src/video_core/renderer_vulkan/pipeline_helper.h | 58 ++++++++++-------------- 1 file changed, 25 insertions(+), 33 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index a39459b2e..e2167dc4b 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,42 +85,34 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } - for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); - } - for ([[maybe_unused]] const auto& desc : info.image_descriptors) { - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage); - } + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage) { - bindings.push_back({ - .binding = binding, - .descriptorType = type, - .descriptorCount = 1, - .stageFlags = stage, - .pImmutableSamplers = nullptr, - }); - entries.push_back(VkDescriptorUpdateTemplateEntryKHR{ - .dstBinding = binding, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = type, - .offset = offset, - .stride = sizeof(DescriptorUpdateEntry), - }); - ++binding; - offset += sizeof(DescriptorUpdateEntry); + void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + for (size_t i = 0; i < num; ++i) { + bindings.push_back({ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, + .pImmutableSamplers = nullptr, + }); + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } } const vk::Device* device{}; -- cgit v1.2.3 From f263760c5a3aff771123b32b15677e1f7a089640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 12 Apr 2021 19:41:22 -0300 Subject: shader: Implement geometry shaders --- .../backend/spirv/emit_context.cpp | 43 ++++++++-- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 38 +++++++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 12 +-- .../backend/spirv/emit_spirv_context_get_set.cpp | 93 ++++++++++++---------- .../backend/spirv/emit_spirv_special.cpp | 42 +++++++--- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 20 +++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 6 +- src/shader_recompiler/frontend/ir/opcodes.inc | 8 +- src/shader_recompiler/frontend/ir/program.h | 4 + src/shader_recompiler/frontend/maxwell/program.cpp | 13 ++- .../translate/impl/load_store_attribute.cpp | 16 ++-- src/shader_recompiler/profile.h | 10 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 56 +++++++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 7 +- 14 files changed, 277 insertions(+), 91 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index bf2210899..01b77a7d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -140,7 +140,27 @@ Id DefineVariable(EmitContext& ctx, Id type, std::optional builtin return id; } +u32 NumVertices(InputTopology input_topology) { + switch (input_topology) { + case InputTopology::Points: + return 1; + case InputTopology::Lines: + return 2; + case InputTopology::LinesAdjacency: + return 4; + case InputTopology::Triangles: + return 3; + case InputTopology::TrianglesAdjacency: + return 6; + } + throw InvalidArgument("Invalid input topology {}", input_topology); +} + Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { + if (ctx.stage == Stage::Geometry) { + const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + } return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); } @@ -455,12 +475,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { void EmitContext::DefineAttributeMemAccess(const Info& info) { const auto make_load{[&] { + const bool is_array{stage == Stage::Geometry}; const Id end_block{OpLabel()}; const Id default_label{OpLabel()}; - const Id func_type_load{TypeFunction(F32[1], U32[1])}; + const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) + : TypeFunction(F32[1], U32[1])}; const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; const Id offset{OpFunctionParameter(U32[1])}; + const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; + AddLabel(); const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; @@ -472,7 +496,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); i++) { + for (u32 i = 0; i < info.input_generics.size(); ++i) { if (!info.input_generics[i].used) { continue; } @@ -486,7 +510,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { size_t label_index{0}; if (info.loads_position) { AddLabel(labels[label_index]); - const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))}; + const Id pointer{is_array + ? OpAccessChain(input_f32, input_position, vertex, masked_index) + : OpAccessChain(input_f32, input_position, masked_index)}; + const Id result{OpLoad(F32[1], pointer)}; OpReturnValue(result); ++label_index; } @@ -502,7 +529,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { continue; } const Id generic_id{input_generics.at(i)}; - const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)}; + const Id pointer{is_array + ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) + : OpAccessChain(type->pointer, generic_id, masked_index)}; const Id value{OpLoad(type->id, pointer)}; const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; OpReturnValue(result); @@ -910,13 +939,13 @@ void EmitContext::DefineOutputs(const Info& info) { } if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { - throw NotImplementedException("Storing PointSize in Fragment stage"); + throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); } if (info.stores_clip_distance) { if (stage == Stage::Fragment) { - throw NotImplementedException("Storing PointSize in Fragment stage"); + throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); @@ -924,7 +953,7 @@ void EmitContext::DefineOutputs(const Info& info) { if (info.stores_viewport_index && (profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) { if (stage == Stage::Fragment) { - throw NotImplementedException("Storing ViewportIndex in Fragment stage"); + throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3258b0cf8..d7c5890ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -134,6 +134,44 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Shader::Stage::VertexB: execution_model = spv::ExecutionModel::Vertex; break; + case Shader::Stage::Geometry: + execution_model = spv::ExecutionModel::Geometry; + ctx.AddCapability(spv::Capability::Geometry); + ctx.AddCapability(spv::Capability::GeometryStreams); + switch (ctx.profile.input_topology) { + case InputTopology::Points: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); + break; + case InputTopology::Lines: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); + break; + case InputTopology::LinesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); + break; + case InputTopology::Triangles: + ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); + break; + case InputTopology::TrianglesAdjacency: + ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); + break; + } + switch (program.output_topology) { + case OutputTopology::PointList: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); + break; + case OutputTopology::LineStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); + break; + case OutputTopology::TriangleStrip: + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); + break; + } + if (program.info.stores_point_size) { + ctx.AddCapability(spv::Capability::GeometryPointSize); + } + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); + ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + break; case Shader::Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 440075212..c0e1b8833 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -34,8 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); void EmitMemoryBarrierSystemLevel(EmitContext& ctx); void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, Id stream); -void EmitEndPrimitive(EmitContext& ctx, Id stream); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); void EmitGetPred(EmitContext& ctx); @@ -51,10 +51,10 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d552a1b52..a91b4c212 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -29,6 +30,15 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } +template +Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { + if (ctx.stage == Stage::Geometry) { + return ctx.OpAccessChain(pointer_type, base, vertex, std::forward(args)...); + } else { + return ctx.OpAccessChain(pointer_type, base, std::forward(args)...); + } +} + std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 element{static_cast(attr) % 4}; const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; @@ -66,6 +76,31 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { throw NotImplementedException("Read attribute {}", attr); } } + +Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, + const IR::Value& binding, const IR::Value& offset) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Constant buffer indexing"); + } + const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; + const Id uniform_type{ctx.uniform_types.*member_ptr}; + if (!offset.IsImmediate()) { + Id index{ctx.Def(offset)}; + if (element_size > 1) { + const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; + const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; + index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); + } + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; + return ctx.OpLoad(result_type, access_chain); + } + if (offset.U32() % element_size != 0) { + throw NotImplementedException("Unaligned immediate constant buffer load"); + } + const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; + const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(result_type, access_chain); +} } // Anonymous namespace void EmitGetRegister(EmitContext&) { @@ -100,31 +135,6 @@ void EmitGetIndirectBranchVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, - u32 element_size, const IR::Value& binding, const IR::Value& offset) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Constant buffer indexing"); - } - const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; - const Id uniform_type{ctx.uniform_types.*member_ptr}; - if (!offset.IsImmediate()) { - Id index{ctx.Def(offset)}; - if (element_size > 1) { - const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; - const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; - index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); - } - const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; - return ctx.OpLoad(result_type, access_chain); - } - if (offset.U32() % element_size != 0) { - throw NotImplementedException("Unaligned immediate constant buffer load"); - } - const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; - const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; - return ctx.OpLoad(result_type, access_chain); -} - Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; return ctx.OpUConvert(ctx.U32[1], load); @@ -157,7 +167,7 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast(attr) % 4}; const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { @@ -168,7 +178,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { return ctx.Constant(ctx.F32[1], 0.0f); } const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())}; + const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } @@ -177,8 +187,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id())); + return ctx.OpLoad( + ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id())); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpLoad(ctx.U32[1], ctx.instance_id); @@ -198,29 +208,32 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { ctx.Constant(ctx.U32[1], std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, - ctx.Constant(ctx.U32[1], 0U))); + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, + ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, + ctx.Constant(ctx.U32[1], 1U))); default: throw NotImplementedException("Read attribute {}", attr); } } -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { const std::optional output{OutputAttrPointer(ctx, attr)}; - if (!output) { - return; + if (output) { + ctx.OpStore(*output, value); } - ctx.OpStore(*output, value); } -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) { - return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { + if (ctx.stage == Stage::Geometry) { + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); + } else { + return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); + } } -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) { +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d20f4def3..6c8fcd5a5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -5,6 +5,17 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +namespace { +void ConvertDepthMode(EmitContext& ctx) { + const Id type{ctx.F32[1]}; + const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; + const Id z{ctx.OpCompositeExtract(type, position, 2u)}; + const Id w{ctx.OpCompositeExtract(type, position, 3u)}; + const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; + const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; + ctx.OpStore(ctx.output_position, vector); +} +} // Anonymous namespace void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB) { @@ -25,23 +36,30 @@ void EmitPrologue(EmitContext& ctx) { } void EmitEpilogue(EmitContext& ctx) { - if (ctx.profile.convert_depth_mode) { - const Id type{ctx.F32[1]}; - const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; - const Id z{ctx.OpCompositeExtract(type, position, 2u)}; - const Id w{ctx.OpCompositeExtract(type, position, 3u)}; - const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; - const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; - ctx.OpStore(ctx.output_position, vector); + if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { + ConvertDepthMode(ctx); } } -void EmitEmitVertex(EmitContext& ctx, Id stream) { - ctx.OpEmitStreamVertex(stream); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + if (ctx.profile.convert_depth_mode) { + ConvertDepthMode(ctx); + } + if (!stream.IsImmediate()) { + // LOG_WARNING(..., "EmitVertex's stream is not constant"); + ctx.OpEmitStreamVertex(ctx.u32_zero_value); + return; + } + ctx.OpEmitStreamVertex(ctx.Def(stream)); } -void EmitEndPrimitive(EmitContext& ctx, Id stream) { - ctx.OpEndStreamPrimitive(stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + if (!stream.IsImmediate()) { + // LOG_WARNING(..., "EndPrimitive's stream is not constant"); + ctx.OpEndStreamPrimitive(ctx.u32_zero_value); + return; + } + ctx.OpEndStreamPrimitive(ctx.Def(stream)); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 7d48fa1ba..d66eb17a6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -308,19 +308,27 @@ U1 IREmitter::GetFlowTestResult(FlowTest test) { } F32 IREmitter::GetAttribute(IR::Attribute attribute) { - return Inst(Opcode::GetAttribute, attribute); + return GetAttribute(attribute, Imm32(0)); } -void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { - Inst(Opcode::SetAttribute, attribute, value); +F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { + return Inst(Opcode::GetAttribute, attribute, vertex); +} + +void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttribute, attribute, value, vertex); } F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { - return Inst(Opcode::GetAttributeIndexed, phys_address); + return GetAttributeIndexed(phys_address, Imm32(0)); +} + +F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { + return Inst(Opcode::GetAttributeIndexed, phys_address, vertex); } -void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { - Inst(Opcode::SetAttributeIndexed, phys_address, value); +void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { + Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 033c4332e..e70359eb1 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -77,10 +77,12 @@ public: [[nodiscard]] U1 GetFlowTestResult(FlowTest test); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); - void SetAttribute(IR::Attribute attribute, const F32& value); + [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); + void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); - void SetAttributeIndexed(const U32& phys_address, const F32& value); + [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); + void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 0e487f1a7..7a21fe746 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -44,10 +44,10 @@ OPCODE(GetCbufS16, U32, U32, OPCODE(GetCbufU32, U32, U32, U32, ) OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU32x2, U32x2, U32, U32, ) -OPCODE(GetAttribute, F32, Attribute, ) -OPCODE(SetAttribute, Void, Attribute, F32, ) -OPCODE(GetAttributeIndexed, F32, U32, ) -OPCODE(SetAttributeIndexed, Void, U32, F32, ) +OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(SetAttribute, Void, Attribute, F32, U32, ) +OPCODE(GetAttributeIndexed, F32, U32, U32, ) +OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 3a37b3ab9..51e1a8c77 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -10,6 +10,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/program_header.h" #include "shader_recompiler/shader_info.h" #include "shader_recompiler/stage.h" @@ -21,6 +22,9 @@ struct Program { Info info; Stage stage{}; std::array workgroup_size{}; + OutputTopology output_topology{}; + u32 output_vertices{}; + u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aaf2a74a7..ab67446c8 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool index_reg; BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<32, 1, u64> o; BitField<31, 1, u64> patch; BitField<47, 2, Size> size; @@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ald.vertex_reg)}; const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); } return; } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); + F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) { BitField<20, 10, u64> absolute_offset; BitField<20, 11, s64> relative_offset; BitField<31, 1, u64> patch; - BitField<39, 8, IR::Reg> array_reg; + BitField<39, 8, IR::Reg> vertex_reg; BitField<47, 2, Size> size; } const ast{insn}; @@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) { if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); } + const IR::U32 vertex{X(ast.vertex_reg)}; const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); } return; } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a4e41bda1..06f1f59bd 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -18,6 +18,14 @@ enum class AttributeType : u8 { Disabled, }; +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + struct Profile { u32 supported_spirv{0x00010000}; @@ -46,6 +54,8 @@ struct Profile { std::array generic_input_types{}; bool convert_depth_mode{}; + InputTopology input_topology{}; + std::optional fixed_state_point_size; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b953d694b..f49add208 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -769,7 +769,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program.stage)}; + const Shader::Profile profile{MakeProfile(key, program)}; const std::vector code{EmitSPIRV(profile, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA } Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - Shader::Stage stage) { + const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; - if (stage == Shader::Stage::VertexB) { - profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = Common::BitCast(key.state.point_size); + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + profile.fixed_state_point_size = point_size; + } + profile.convert_depth_mode = gl_ndc; + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + profile.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + profile.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + profile.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + profile.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + profile.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; } return profile; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 343ea1554..8b6839966 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -33,6 +33,10 @@ namespace Core { class System; } +namespace Shader::IR { +struct Program; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -160,7 +164,8 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); + Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program); Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; -- cgit v1.2.3 From fa75b9b0626c8e118e27207dd1e82e2f415fc0bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 05:32:21 -0300 Subject: spirv: Rework storage buffers and shader memory --- .../backend/spirv/emit_context.cpp | 440 ++++++++++++--------- src/shader_recompiler/backend/spirv/emit_context.h | 49 ++- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 20 +- .../backend/spirv/emit_spirv_atomic.cpp | 333 ++++++---------- .../backend/spirv/emit_spirv_memory.cpp | 135 +++---- .../ir_opt/collect_shader_info_pass.cpp | 69 +++- src/shader_recompiler/shader_info.h | 4 +- src/video_core/vulkan_common/vulkan_device.cpp | 29 +- 9 files changed, 581 insertions(+), 500 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 01b77a7d1..df53e58a8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -15,7 +15,7 @@ namespace Shader::Backend::SPIRV { namespace { -enum class CasFunctionType { +enum class Operation { Increment, Decrement, FPAdd, @@ -23,44 +23,11 @@ enum class CasFunctionType { FPMax, }; -Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { - const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; - const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; - const Id op_a{ctx.OpFunctionParameter(value_type)}; - const Id op_b{ctx.OpFunctionParameter(value_type)}; - ctx.AddLabel(); - Id result{}; - switch (function_type) { - case CasFunctionType::Increment: { - const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; - const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; - result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); - break; - } - case CasFunctionType::Decrement: { - const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; - const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; - const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; - const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; - result = ctx.OpSelect(value_type, pred, op_b, decr); - break; - } - case CasFunctionType::FPAdd: - result = ctx.OpFAdd(value_type, op_a, op_b); - break; - case CasFunctionType::FPMin: - result = ctx.OpFMin(value_type, op_a, op_b); - break; - case CasFunctionType::FPMax: - result = ctx.OpFMax(value_type, op_a, op_b); - break; - default: - break; - } - ctx.OpReturnValue(result); - ctx.OpFunctionEnd(); - return func; -} +struct AttrInfo { + Id pointer; + Id id; + bool needs_cast; +}; Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { const spv::ImageFormat format{spv::ImageFormat::Unknown}; @@ -182,12 +149,6 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { throw InvalidArgument("Invalid attribute type {}", type); } -struct AttrInfo { - Id pointer; - Id id; - bool needs_cast; -}; - std::optional AttrTypes(EmitContext& ctx, u32 index) { const AttributeType type{ctx.profile.generic_input_types.at(index)}; switch (type) { @@ -203,6 +164,164 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { throw InvalidArgument("Invalid attribute type {}", type); } +void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, + u32 binding, Id type, char type_char, u32 element_size) { + const Id array_type{ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 65536U / element_size))}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberName(struct_type, 0, "data"); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; + const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; + ctx.uniform_types.*member_type = uniform_type; + + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("c{}", desc.index)); + for (size_t i = 0; i < desc.count; ++i) { + ctx.cbufs[desc.index + i].*member_type = id; + } + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + binding += desc.count; + } +} + +void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, + u32 stride) { + const Id array_type{ctx.TypeRuntimeArray(type)}; + ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); + + const Id struct_type{ctx.TypeStruct(array_type)}; + ctx.Decorate(struct_type, spv::Decoration::Block); + ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + type_def.array = struct_pointer; + type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); + + u32 index{}; + for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; + ctx.Decorate(id, spv::Decoration::Binding, binding); + ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); + ctx.Name(id, fmt::format("ssbo{}", index)); + if (ctx.profile.supported_spirv >= 0x00010400) { + ctx.interfaces.push_back(id); + } + for (size_t i = 0; i < desc.count; ++i) { + ctx.ssbos[index + i].*member_type = id; + } + index += desc.count; + binding += desc.count; + } +} + +Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { + const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id op_a{ctx.OpFunctionParameter(value_type)}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + ctx.AddLabel(); + Id result{}; + switch (operation) { + case Operation::Increment: { + const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; + const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); + break; + } + case Operation::Decrement: { + const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; + const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; + const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; + const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; + result = ctx.OpSelect(value_type, pred, op_b, decr); + break; + } + case Operation::FPAdd: + result = ctx.OpFAdd(value_type, op_a, op_b); + break; + case Operation::FPMin: + result = ctx.OpFMin(value_type, op_a, op_b); + break; + case Operation::FPMax: + result = ctx.OpFMax(value_type, op_a, op_b); + break; + default: + break; + } + ctx.OpReturnValue(result); + ctx.OpFunctionEnd(); + return func; +} + +Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, + Id value_type, Id memory_type, spv::Scope scope) { + const bool is_shared{scope == spv::Scope::Workgroup}; + const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; + const Id cas_func{CasFunction(ctx, operation, value_type)}; + const Id zero{ctx.u32_zero_value}; + const Id scope_id{ctx.Constant(ctx.U32[1], static_cast(scope))}; + + const Id loop_header{ctx.OpLabel()}; + const Id continue_block{ctx.OpLabel()}; + const Id merge_block{ctx.OpLabel()}; + const Id func_type{is_shared + ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) + : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; + + const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; + const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; + const Id op_b{ctx.OpFunctionParameter(value_type)}; + const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; + ctx.AddLabel(); + ctx.OpBranch(loop_header); + ctx.AddLabel(loop_header); + + ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); + ctx.OpBranch(continue_block); + + ctx.AddLabel(continue_block); + const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) + : ctx.OpAccessChain(element_pointer, base, index)}; + if (value_type.value == ctx.F32[2].value) { + const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; + const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; + const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; + const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, u32_new_value, u32_value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); + } else { + const Id value{ctx.OpLoad(memory_type, word_pointer)}; + const bool matching_type{value_type.value == memory_type.value}; + const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; + const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; + const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; + const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, + zero, new_value, value)}; + const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; + ctx.OpBranchConditional(success, merge_block, loop_header); + + ctx.AddLabel(merge_block); + ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); + } + ctx.OpFunctionEnd(); + return func; +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -226,6 +345,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineInterfaces(program.info); DefineLocalMemory(program); DefineSharedMemory(program); + DefineSharedMemoryFunctions(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); @@ -263,56 +383,6 @@ Id EmitContext::Def(const IR::Value& value) { } } -Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { - const Id loop_header{OpLabel()}; - const Id continue_block{OpLabel()}; - const Id merge_block{OpLabel()}; - const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type - : storage_memory_u32}; - const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; - const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; - const Id index{OpFunctionParameter(U32[1])}; - const Id op_b{OpFunctionParameter(value_type)}; - const Id base{OpFunctionParameter(storage_type)}; - AddLabel(); - const Id one{Constant(U32[1], 1)}; - OpBranch(loop_header); - AddLabel(loop_header); - OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); - OpBranch(continue_block); - - AddLabel(continue_block); - const Id word_pointer{pointer_type == CasPointerType::Shared - ? OpAccessChain(shared_u32, base, index) - : OpAccessChain(storage_u32, base, u32_zero_value, index)}; - if (value_type.value == F32[2].value) { - const Id u32_value{OpLoad(U32[1], word_pointer)}; - const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; - const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; - const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, - u32_zero_value, u32_new_value, u32_value)}; - const Id success{OpIEqual(U1, atomic_res, u32_value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); - } else { - const Id value{OpLoad(U32[1], word_pointer)}; - const Id new_value{OpBitcast( - U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, - u32_zero_value, new_value, value)}; - const Id success{OpIEqual(U1, atomic_res, value)}; - OpBranchConditional(success, merge_block, loop_header); - - AddLabel(merge_block); - OpReturnValue(OpBitcast(value_type, atomic_res)); - } - OpFunctionEnd(); - return func; -} - void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -397,27 +467,31 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { Decorate(variable, spv::Decoration::Aliased); interfaces.push_back(variable); - return std::make_pair(variable, element_pointer); + return std::make_tuple(variable, element_pointer, pointer); }}; if (profile.support_explicit_workgroup_layout) { AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); if (program.info.uses_int8) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); - std::tie(shared_memory_u8, shared_u8) = make(U8, 1); + std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); } if (program.info.uses_int16) { AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); - std::tie(shared_memory_u16, shared_u16) = make(U16, 2); + std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); + } + if (program.info.uses_int64) { + std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); } - std::tie(shared_memory_u32, shared_u32) = make(U32[1], 4); - std::tie(shared_memory_u32x2, shared_u32x2) = make(U32[2], 8); - std::tie(shared_memory_u32x4, shared_u32x4) = make(U32[4], 16); + std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); + std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); + std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); interfaces.push_back(shared_memory_u32); @@ -463,13 +537,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { if (program.info.uses_int16) { shared_store_u16_func = make_function(16, 16); } +} + +void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { if (program.info.uses_shared_increment) { - const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; - increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); + increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); } if (program.info.uses_shared_decrement) { - const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; - decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); + decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, + shared_u32, U32[1], U32[1], spv::Scope::Workgroup); } } @@ -628,21 +705,24 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { return; } if (True(info.used_constant_buffer_types & IR::Type::U8)) { - DefineConstantBuffers(info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); - DefineConstantBuffers(info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); } if (True(info.used_constant_buffer_types & IR::Type::U16)) { - DefineConstantBuffers(info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); - DefineConstantBuffers(info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); } if (True(info.used_constant_buffer_types & IR::Type::U32)) { - DefineConstantBuffers(info, &UniformDefinitions::U32, binding, U32[1], 'u', sizeof(u32)); + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); } if (True(info.used_constant_buffer_types & IR::Type::F32)) { - DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32)); + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); } if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { - DefineConstantBuffers(info, &UniformDefinitions::U32x2, binding, U32[2], 'u', sizeof(u64)); + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); } for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { binding += desc.count; @@ -655,75 +735,83 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } AddExtension("SPV_KHR_storage_buffer_storage_class"); - const Id array_type{TypeRuntimeArray(U32[1])}; - Decorate(array_type, spv::Decoration::ArrayStride, 4U); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, "ssbo_block"); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); - storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); - - u32 index{}; + if (True(info.used_storage_buffer_types & IR::Type::U8)) { + DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, + sizeof(u8)); + DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, + sizeof(u8)); + } + if (True(info.used_storage_buffer_types & IR::Type::U16)) { + DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, + sizeof(u16)); + DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, + sizeof(u16)); + } + if (True(info.used_storage_buffer_types & IR::Type::U32)) { + DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], + sizeof(u32)); + } + if (True(info.used_storage_buffer_types & IR::Type::F32)) { + DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], + sizeof(f32)); + } + if (True(info.used_storage_buffer_types & IR::Type::U64)) { + DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, + sizeof(u64)); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { + DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], + sizeof(u32[2])); + } + if (True(info.used_storage_buffer_types & IR::Type::U32x4)) { + DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], + sizeof(u32[4])); + } for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { - const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("ssbo{}", index)); - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - std::fill_n(ssbos.data() + index, desc.count, id); - index += desc.count; binding += desc.count; } - if (info.uses_global_increment) { + const bool needs_function{ + info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || + info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || + info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; + if (needs_function) { AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; - increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); + } + if (info.uses_global_increment) { + increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); } if (info.uses_global_decrement) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; - decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); + decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, + storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); } if (info.uses_atomic_f32_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; - f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); + f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); } if (info.uses_atomic_f16x2_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; - f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); + f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f16x2_min) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; - f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f16x2_max) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; - f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); + f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_add) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; - f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); + f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_min) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; - f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } if (info.uses_atomic_f32x2_max) { - AddCapability(spv::Capability::VariablePointersStorageBuffer); - const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; - f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); + f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, + storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); } } @@ -903,36 +991,6 @@ void EmitContext::DefineInputs(const Info& info) { } } -void EmitContext::DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, - u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{TypeArray(type, Constant(U32[1], 65536U / element_size))}; - Decorate(array_type, spv::Decoration::ArrayStride, element_size); - - const Id struct_type{TypeStruct(array_type)}; - Name(struct_type, fmt::format("cbuf_block_{}{}", type_char, element_size * CHAR_BIT)); - Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "data"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - - const Id struct_pointer_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; - const Id uniform_type{TypePointer(spv::StorageClass::Uniform, type)}; - uniform_types.*member_type = uniform_type; - - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - const Id id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("c{}", desc.index)); - for (size_t i = 0; i < desc.count; ++i) { - cbufs[desc.index + i].*member_type = id; - } - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - binding += desc.count; - } -} - void EmitContext::DefineOutputs(const Info& info) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 98a9140bf..cade1fa0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -50,6 +50,35 @@ struct UniformDefinitions { Id U32x2{}; }; +struct StorageTypeDefinition { + Id array{}; + Id element{}; +}; + +struct StorageTypeDefinitions { + StorageTypeDefinition U8{}; + StorageTypeDefinition S8{}; + StorageTypeDefinition U16{}; + StorageTypeDefinition S16{}; + StorageTypeDefinition U32{}; + StorageTypeDefinition U64{}; + StorageTypeDefinition F32{}; + StorageTypeDefinition U32x2{}; + StorageTypeDefinition U32x4{}; +}; + +struct StorageDefinitions { + Id U8{}; + Id S8{}; + Id U16{}; + Id S16{}; + Id U32{}; + Id F32{}; + Id U64{}; + Id U32x2{}; + Id U32x4{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); @@ -78,12 +107,14 @@ public: Id f32_zero_value{}; UniformDefinitions uniform_types; + StorageTypeDefinitions storage_types; Id private_u32{}; Id shared_u8{}; Id shared_u16{}; Id shared_u32{}; + Id shared_u64{}; Id shared_u32x2{}; Id shared_u32x4{}; @@ -93,14 +124,11 @@ public: Id output_f32{}; - Id storage_u32{}; - Id storage_memory_u32{}; - Id image_buffer_type{}; Id sampled_texture_buffer_type{}; std::array cbufs{}; - std::array ssbos{}; + std::array ssbos{}; std::vector texture_buffers; std::vector textures; std::vector images; @@ -136,8 +164,10 @@ public: Id shared_memory_u8{}; Id shared_memory_u16{}; Id shared_memory_u32{}; + Id shared_memory_u64{}; Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u32_type{}; Id shared_store_u8_func{}; @@ -167,16 +197,12 @@ public: std::vector interfaces; private: - enum class CasPointerType { - Shared, - Ssbo, - }; - void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineInterfaces(const Info& info); void DefineLocalMemory(const IR::Program& program); void DefineSharedMemory(const IR::Program& program); + void DefineSharedMemoryFunctions(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); @@ -185,13 +211,8 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); - void DefineConstantBuffers(const Info& info, Id UniformDefinitions::*member_type, u32 binding, - Id type, char type_char, u32 element_size); - void DefineInputs(const Info& info); void DefineOutputs(const Info& info); - - [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d7c5890ab..61a2018d7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -276,7 +276,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddCapability(spv::Capability::SubgroupVoteKHR); } } - if (info.uses_64_bit_atomics && profile.support_int64_atomics) { + if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { ctx.AddCapability(spv::Capability::Int64Atomics); } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index c0e1b8833..55b2edba0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -89,17 +89,21 @@ void EmitWriteGlobalS16(EmitContext& ctx); void EmitWriteGlobal32(EmitContext& ctx); void EmitWriteGlobal64(EmitContext& ctx); void EmitWriteGlobal128(EmitContext& ctx); -void EmitLoadStorageU8(EmitContext& ctx); -void EmitLoadStorageS8(EmitContext& ctx); -void EmitLoadStorageU16(EmitContext& ctx); -void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx); -void EmitWriteStorageS8(EmitContext& ctx); -void EmitWriteStorageU16(EmitContext& ctx); -void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 03d891419..aab32dc52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -6,11 +6,12 @@ namespace Shader::Backend::SPIRV { namespace { - -Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { +Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; + Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + if (index_offset > 0) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Constant(ctx.U32[1], index_offset)); + } return ctx.profile.support_explicit_workgroup_layout ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); @@ -30,340 +31,258 @@ Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - u32 index_offset = 0) { - // TODO: Support reinterpreting bindings, guaranteed to be aligned +Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, + Id StorageDefinitions::*member_ptr, const IR::Value& binding, + const IR::Value& offset, size_t element_size) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; - return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); } -std::pair GetAtomicArgs(EmitContext& ctx) { +std::pair AtomicArgs(EmitContext& ctx) { const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; return {scope, semantics}; } -Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { - const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; - const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; - const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; - return ctx.OpBitcast(ctx.U64, original_composite); +Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{SharedPointer(ctx, offset)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } -void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { - const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; - ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); - ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); +Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, + offset, sizeof(u32))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); +} + +Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + if (ctx.profile.support_int64_atomics) { + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); + } + // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; + ctx.OpStore(pointer, result); + return original_value; } } // Anonymous namespace -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); } -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); } -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); } -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); } -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); } -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { +Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; - return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, - ctx.shared_memory_u32); + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); } -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { +Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; - const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; - return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, - ctx.shared_memory_u32); + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); } -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); } -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); } -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); } -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer{GetSharedPointer(ctx, pointer_offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); } -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { - const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { + if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { + const Id shift_id{ctx.Constant(ctx.U32[1], 3U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - StoreResult(ctx, pointer_1, pointer_2, value); - return original_value; + // LOG_WARNING("Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); } Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); } Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); } Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); } Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); } Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); } Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); } Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); } Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); } Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); } Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); } Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer{GetStoragePointer(ctx, binding, offset)}; - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); + return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); } Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, + &Sirit::Module::OpIAdd); } Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, + &Sirit::Module::OpSMin); } Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, + &Sirit::Module::OpUMin); } Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, + &Sirit::Module::OpSMax); } Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, + &Sirit::Module::OpUMax); } Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, + &Sirit::Module::OpBitwiseAnd); } Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, + &Sirit::Module::OpBitwiseOr); } Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; - if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); - } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; - StoreResult(ctx, pointer_1, pointer_2, result); - return original_value; + return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, + &Sirit::Module::OpBitwiseXor); } Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; if (ctx.profile.support_int64_atomics) { - const auto [scope, semantics]{GetAtomicArgs(ctx)}; - return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, + binding, offset, sizeof(u64))}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); - const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; - const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; - StoreResult(ctx, pointer_1, pointer_2, value); - return original_value; + // LOG_WARNING(..., "Int64 Atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; + ctx.OpStore(pointer, value); + return original; } Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); } Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -371,7 +290,7 @@ Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); @@ -379,7 +298,7 @@ Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -387,7 +306,7 @@ Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); @@ -395,7 +314,7 @@ Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; return ctx.OpBitcast(ctx.U32[1], result); @@ -403,7 +322,7 @@ Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const I Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id ssbo{ctx.ssbos[binding.U32()].U32}; const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; return ctx.OpPackHalf2x16(ctx.U32[1], result); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 088bd3059..a8f2ea5a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,29 +22,29 @@ Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -Id EmitLoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - u32 num_components) { - // TODO: Support reinterpreting bindings, guaranteed to be aligned +Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - std::array components; - for (u32 element = 0; element < num_components; ++element) { - Id index{base_index}; - if (element > 0) { - index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); - } - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - components[element] = ctx.OpLoad(ctx.U32[1], pointer); - } - if (num_components == 1) { - return components[0]; - } else { - const std::span components_span(components.data(), num_components); - return ctx.OpCompositeConstruct(ctx.U32[num_components], components_span); - } + const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; + const Id index{StorageIndex(ctx, offset, element_size)}; + return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); +} + +Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { + const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + return ctx.OpLoad(result_type, pointer); +} + +void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + const StorageTypeDefinition& type_def, size_t element_size, + Id StorageDefinitions::*member_ptr) { + const Id pointer{StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr)}; + ctx.OpStore(pointer, value); } } // Anonymous namespace @@ -104,92 +104,85 @@ void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitLoadStorageU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8)); } -void EmitLoadStorageS8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8)); } -void EmitLoadStorageU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpUConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16)); } -void EmitLoadStorageS16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return ctx.OpSConvert(ctx.U32[1], + LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16)); } Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 1); + return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32); } Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 2); + return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); } Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return EmitLoadStorage(ctx, binding, offset, 4); + return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); } -void EmitWriteStorageU8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8); } -void EmitWriteStorageS8(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8); } -void EmitWriteStorageU16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16); } -void EmitWriteStorageS16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, value); + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - // TODO: Support reinterpreting bindings, guaranteed to be aligned - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id low_index{StorageIndex(ctx, offset, sizeof(u32))}; - const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))}; - const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)}; - const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)}; - ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); - ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), + &StorageDefinitions::U32x2); } void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - if (!binding.IsImmediate()) { - throw NotImplementedException("Dynamic storage buffer indexing"); - } - // TODO: Support reinterpreting bindings, guaranteed to be aligned - const Id ssbo{ctx.ssbos[binding.U32()]}; - const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; - for (u32 element = 0; element < 4; ++element) { - Id index = base_index; - if (element > 0) { - index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element)); - } - const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; - ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element)); - } + WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), + &StorageDefinitions::U32x4); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index ab529e86d..116d93c1c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -315,6 +315,23 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::ConvertF32U64: case IR::Opcode::ConvertF64U64: case IR::Opcode::SharedAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd64: + case IR::Opcode::GlobalAtomicSMin64: + case IR::Opcode::GlobalAtomicUMin64: + case IR::Opcode::GlobalAtomicSMax64: + case IR::Opcode::GlobalAtomicUMax64: + case IR::Opcode::GlobalAtomicAnd64: + case IR::Opcode::GlobalAtomicOr64: + case IR::Opcode::GlobalAtomicXor64: + case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::StorageAtomicIAdd64: + case IR::Opcode::StorageAtomicSMin64: + case IR::Opcode::StorageAtomicUMin64: + case IR::Opcode::StorageAtomicSMax64: + case IR::Opcode::StorageAtomicUMax64: + case IR::Opcode::StorageAtomicAnd64: + case IR::Opcode::StorageAtomicOr64: + case IR::Opcode::StorageAtomicXor64: info.uses_int64 = true; break; default: @@ -457,46 +474,91 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FSwizzleAdd: info.uses_fswzadd = true; break; + case IR::Opcode::LoadStorageU8: + case IR::Opcode::LoadStorageS8: + case IR::Opcode::WriteStorageU8: + case IR::Opcode::WriteStorageS8: + info.used_storage_buffer_types |= IR::Type::U8; + break; + case IR::Opcode::LoadStorageU16: + case IR::Opcode::LoadStorageS16: + case IR::Opcode::WriteStorageU16: + case IR::Opcode::WriteStorageS16: + info.used_storage_buffer_types |= IR::Type::U16; + break; + case IR::Opcode::LoadStorage32: + case IR::Opcode::WriteStorage32: + case IR::Opcode::StorageAtomicIAdd32: + case IR::Opcode::StorageAtomicSMin32: + case IR::Opcode::StorageAtomicUMin32: + case IR::Opcode::StorageAtomicSMax32: + case IR::Opcode::StorageAtomicUMax32: + case IR::Opcode::StorageAtomicAnd32: + case IR::Opcode::StorageAtomicOr32: + case IR::Opcode::StorageAtomicXor32: + case IR::Opcode::StorageAtomicExchange32: + info.used_storage_buffer_types |= IR::Type::U32; + break; + case IR::Opcode::LoadStorage64: + case IR::Opcode::WriteStorage64: + info.used_storage_buffer_types |= IR::Type::U32x2; + break; + case IR::Opcode::LoadStorage128: + case IR::Opcode::WriteStorage128: + info.used_storage_buffer_types |= IR::Type::U32x4; + break; case IR::Opcode::SharedAtomicInc32: info.uses_shared_increment = true; break; case IR::Opcode::SharedAtomicDec32: info.uses_shared_decrement = true; break; + case IR::Opcode::SharedAtomicExchange64: + info.uses_int64_bit_atomics = true; + break; case IR::Opcode::GlobalAtomicInc32: case IR::Opcode::StorageAtomicInc32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_increment = true; break; case IR::Opcode::GlobalAtomicDec32: case IR::Opcode::StorageAtomicDec32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_global_decrement = true; break; case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::StorageAtomicAddF32: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32_add = true; break; case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::StorageAtomicAddF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_add = true; break; case IR::Opcode::GlobalAtomicAddF32x2: case IR::Opcode::StorageAtomicAddF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_add = true; break; case IR::Opcode::GlobalAtomicMinF16x2: case IR::Opcode::StorageAtomicMinF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_min = true; break; case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::StorageAtomicMinF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_min = true; break; case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::StorageAtomicMaxF16x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f16x2_max = true; break; case IR::Opcode::GlobalAtomicMaxF32x2: case IR::Opcode::StorageAtomicMaxF32x2: + info.used_storage_buffer_types |= IR::Type::U32; info.uses_atomic_f32x2_max = true; break; case IR::Opcode::GlobalAtomicIAdd64: @@ -516,11 +578,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::StorageAtomicAnd64: case IR::Opcode::StorageAtomicOr64: case IR::Opcode::StorageAtomicXor64: - info.uses_64_bit_atomics = true; - break; - case IR::Opcode::SharedAtomicExchange64: - info.uses_64_bit_atomics = true; - info.uses_shared_memory_u32x2 = true; + info.used_storage_buffer_types |= IR::Type::U64; + info.uses_int64_bit_atomics = true; break; default: break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 6a51aabb5..15cf09c3d 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -141,10 +141,10 @@ struct Info { bool uses_atomic_f32x2_add{}; bool uses_atomic_f32x2_min{}; bool uses_atomic_f32x2_max{}; - bool uses_64_bit_atomics{}; - bool uses_shared_memory_u32x2{}; + bool uses_int64_bit_atomics{}; IR::Type used_constant_buffer_types{}; + IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 911dfed44..87cfe6312 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -44,6 +44,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -313,6 +314,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, host_query_reset); + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR, + .pNext = nullptr, + .variablePointersStorageBuffer = VK_TRUE, + .variablePointers = VK_TRUE, + }; + SetNext(next, variable_pointers); + VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT, .pNext = nullptr, @@ -399,6 +408,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; + if (ext_shader_atomic_int64) { + atomic_int64 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR, + .pNext = nullptr, + .shaderBufferInt64Atomics = VK_TRUE, + .shaderSharedInt64Atomics = VK_TRUE, + }; + SetNext(next, atomic_int64); + } + VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_layout; if (khr_workgroup_memory_explicit_layout) { workgroup_layout = { @@ -624,9 +644,13 @@ void Device::CheckSuitability(bool requires_swapchain) const { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; demote.pNext = nullptr; + VkPhysicalDeviceVariablePointerFeaturesKHR variable_pointers{}; + variable_pointers.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES_KHR; + variable_pointers.pNext = &demote; + VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - robustness2.pNext = &demote; + robustness2.pNext = &variable_pointers; VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -654,6 +678,9 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), + std::make_pair(variable_pointers.variablePointers, "variablePointers"), + std::make_pair(variable_pointers.variablePointersStorageBuffer, + "variablePointersStorageBuffer"), std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), -- cgit v1.2.3 From a83579b50a167ab9483e5058fd1c748018ef6d7c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 13 Apr 2021 16:56:22 -0300 Subject: shader: Implement early Z tests --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 3 files changed, 5 insertions(+) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 61a2018d7..7ad00c434 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -178,6 +178,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (program.info.stores_frag_depth) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } + if (ctx.profile.force_early_z) { + ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); + } break; default: throw NotImplementedException("Stage {}", program.stage); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 06f1f59bd..919bec4e2 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -53,6 +53,7 @@ struct Profile { std::array generic_input_types{}; bool convert_depth_mode{}; + bool force_early_z{}; InputTopology input_topology{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f49add208..8a59a2611 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -934,6 +934,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, profile.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + profile.force_early_z = key.state.early_z != 0; return profile; } -- cgit v1.2.3 From b126987c59964d81ae3705ad7ad6c0ace8714e19 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 01:04:59 -0300 Subject: shader: Implement transform feedbacks and define file format --- .../backend/spirv/emit_context.cpp | 54 ++++++++-- src/shader_recompiler/backend/spirv/emit_context.h | 8 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 + .../backend/spirv/emit_spirv_context_get_set.cpp | 19 +++- .../backend/spirv/emit_spirv_special.cpp | 29 ++++- src/shader_recompiler/frontend/ir/attribute.cpp | 7 ++ src/shader_recompiler/frontend/ir/attribute.h | 2 + src/shader_recompiler/profile.h | 10 ++ .../renderer_vulkan/fixed_pipeline_state.cpp | 19 +++- .../renderer_vulkan/fixed_pipeline_state.h | 26 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 118 ++++++++++++++++++++- 11 files changed, 272 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index df53e58a8..74c42233d 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -135,6 +135,45 @@ Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } +void DefineGenericOutput(EmitContext& ctx, size_t index) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_attr_index{static_cast(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!ctx.profile.xfb_varyings.empty()) { + xfb_varying = &ctx.profile.xfb_varyings[base_attr_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + + const Id id{DefineOutput(ctx, ctx.F32[num_components])}; + ctx.Decorate(id, spv::Decoration::Location, static_cast(index)); + if (element > 0) { + ctx.Decorate(id, spv::Decoration::Component, element); + } + if (xfb_varying) { + ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); + ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); + ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); + } + if (num_components < 4 || element > 0) { + ctx.Name(id, fmt::format("out_attr{}", index)); + } else { + const std::string_view subswizzle{swizzle.substr(element, num_components)}; + ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } + const GenericElementInfo info{ + .id = id, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(ctx.output_generics[index].begin(), num_components, info); + element += num_components; + } +} + Id GetAttributeType(EmitContext& ctx, AttributeType type) { switch (type) { case AttributeType::Float: @@ -663,12 +702,15 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); i++) { + for (size_t i = 0; i < info.stores_generics.size(); ++i) { if (!info.stores_generics[i]) { continue; } + if (output_generics[i][0].num_components != 4) { + throw NotImplementedException("Physical stores and transform feedbacks"); + } AddLabel(labels[label_index]); - const Id generic_id{output_generics.at(i)}; + const Id generic_id{output_generics[i][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); @@ -1015,11 +1057,9 @@ void EmitContext::DefineOutputs(const Info& info) { } viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (info.stores_generics[i]) { - output_generics[i] = DefineOutput(*this, F32[4]); - Decorate(output_generics[i], spv::Decoration::Location, static_cast(i)); - Name(output_generics[i], fmt::format("out_attr{}", i)); + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { + DefineGenericOutput(*this, index); } } if (stage == Stage::Fragment) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index cade1fa0d..b27e5540c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -79,6 +79,12 @@ struct StorageDefinitions { Id U32x4{}; }; +struct GenericElementInfo { + Id id{}; + u32 first_element{}; + u32 num_components{}; +}; + class EmitContext final : public Sirit::Module { public: explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); @@ -189,7 +195,7 @@ public: Id output_point_size{}; Id output_position{}; - std::array output_generics{}; + std::array, 32> output_generics{}; std::array frag_color{}; Id frag_depth{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7ad00c434..444ba276f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -288,6 +288,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_writes) { ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } + if (!ctx.profile.xfb_varyings.empty()) { + ctx.AddCapability(spv::Capability::TransformFeedback); + } // TODO: Track this usage ctx.AddCapability(spv::Capability::ImageGatherExtended); ctx.AddCapability(spv::Capability::ImageQuery); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index a91b4c212..f9c151a5c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -40,11 +40,17 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... } std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { - const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id()); + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + if (info.num_components == 1) { + return info.id; + } else { + const u32 index_element{element - info.first_element}; + const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; + return ctx.OpAccessChain(ctx.output_f32, info.id, index_id); + } } switch (attr) { case IR::Attribute::PointSize: @@ -52,8 +58,11 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionX: case IR::Attribute::PositionY: case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id()); + case IR::Attribute::PositionW: { + const u32 element{static_cast(attr) % 4}; + const Id element_id{ctx.Constant(ctx.U32[1], element)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id); + } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: case IR::Attribute::ClipDistance2: diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index fee740c08..7af29e4dd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -22,6 +22,21 @@ void SetFixedPipelinePointSize(EmitContext& ctx) { ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); } } + +Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one, + Id default_vector) { + switch (num_components) { + case 1: + return element == 3 ? one : zero; + case 2: + return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero); + case 3: + return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero); + case 4: + return default_vector; + } + throw InvalidArgument("Bad element"); +} } // Anonymous namespace void EmitPrologue(EmitContext& ctx) { @@ -30,9 +45,17 @@ void EmitPrologue(EmitContext& ctx) { const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; ctx.OpStore(ctx.output_position, default_vector); - for (const Id generic_id : ctx.output_generics) { - if (Sirit::ValidId(generic_id)) { - ctx.OpStore(generic_id, default_vector); + for (const auto& info : ctx.output_generics) { + if (info[0].num_components == 0) { + continue; + } + u32 element{0}; + while (element < 4) { + const auto& element_info{info[element]}; + const u32 num{element_info.num_components}; + const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)}; + ctx.OpStore(element_info.id, value); + element += num; } } } diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp index 7993e5c43..4d0b8b8e5 100644 --- a/src/shader_recompiler/frontend/ir/attribute.cpp +++ b/src/shader_recompiler/frontend/ir/attribute.cpp @@ -20,6 +20,13 @@ u32 GenericAttributeIndex(Attribute attribute) { return (static_cast(attribute) - static_cast(Attribute::Generic0X)) / 4u; } +u32 GenericAttributeElement(Attribute attribute) { + if (!IsGeneric(attribute)) { + throw InvalidArgument("Attribute is not generic {}", attribute); + } + return static_cast(attribute) % 4; +} + std::string NameOf(Attribute attribute) { switch (attribute) { case Attribute::PrimitiveId: diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 34ec7e0cd..8bf2ddf30 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -226,6 +226,8 @@ enum class Attribute : u64 { [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); +[[nodiscard]] u32 GenericAttributeElement(Attribute attribute); + [[nodiscard]] std::string NameOf(Attribute attribute); } // namespace Shader::IR diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 919bec4e2..5ecae71b9 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include "common/common_types.h" @@ -26,6 +27,13 @@ enum class InputTopology { TrianglesAdjacency, }; +struct TransformFeedbackVarying { + u32 buffer{}; + u32 stride{}; + u32 offset{}; + u32 components{}; +}; + struct Profile { u32 supported_spirv{0x00010000}; @@ -58,6 +66,8 @@ struct Profile { InputTopology input_topology{}; std::optional fixed_state_point_size; + + std::vector xfb_varyings; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d8f683907..6a3baf837 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -52,6 +52,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; + no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -113,10 +115,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (!has_extended_dynamic_state) { - no_extended_dynamic_state.Assign(1); + if (no_extended_dynamic_state != 0) { dynamic_state.Refresh(regs); } + if (xfb_enabled != 0) { + xfb_state.Refresh(regs); + } } void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { @@ -158,6 +162,17 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } +void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { + return Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + varyings = regs.tfb_varying_locs; +} + void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 348f1d6ce..5568c4f72 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -130,6 +130,18 @@ struct FixedPipelineState { } }; + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + + void Refresh(const Maxwell& regs); + }; + struct DynamicState { union { u32 raw1; @@ -168,6 +180,7 @@ struct FixedPipelineState { union { u32 raw1; BitField<0, 1, u32> no_extended_dynamic_state; + BitField<1, 1, u32> xfb_enabled; BitField<2, 1, u32> primitive_restart_enable; BitField<3, 1, u32> depth_bias_enable; BitField<4, 1, u32> depth_clamp_disabled; @@ -199,6 +212,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; + TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); @@ -211,8 +225,16 @@ struct FixedPipelineState { } size_t Size() const noexcept { - const size_t total_size = sizeof *this; - return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); + if (xfb_enabled != 0) { + // When transform feedback is enabled, use the whole struct + return sizeof(*this); + } else if (no_extended_dynamic_state != 0) { + // Dynamic state is enabled, we can enable more + return offsetof(FixedPipelineState, xfb_state); + } else { + // No XFB, extended dynamic state enabled + return offsetof(FixedPipelineState, dynamic_state); + } } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8a59a2611..de52d0f30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -248,6 +248,10 @@ namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +// TODO: Move this to a separate file +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION{1}; + class GraphicsEnvironment final : public GenericEnvironment { public: explicit GraphicsEnvironment() = default; @@ -379,13 +383,14 @@ void SerializePipeline(const Key& key, const Envs& envs, const std::string& file try { std::ofstream file; file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app); + Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); if (!file.is_open()) { LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); return; } if (file.tellp() == 0) { - // Write header... + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); } const std::span key_span(reinterpret_cast(&key), sizeof(key)); SerializePipeline(key_span, MakeSpan(envs), file); @@ -520,8 +525,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading file.exceptions(std::ifstream::failbit); const auto end{file.tellg()}; file.seekg(0, std::ios::beg); - // Read header... + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::Delete(pipeline_cache_filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Render_Vulkan, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + pipeline_cache_filename); + } + return; + } while (file.tellg() != end) { if (stop_loading) { return; @@ -879,6 +903,88 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +static std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::Profile profile{base_profile}; @@ -893,6 +999,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; } std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), @@ -902,6 +1011,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; } + if (key.state.xfb_enabled != 0) { + profile.xfb_varyings = MakeTransformFeedbackVaryings(key); + } profile.convert_depth_mode = gl_ndc; break; default: -- cgit v1.2.3 From 6c512f4bffde6bd8e4dbc74ed27cc84cd7fffadb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 14 Apr 2021 00:32:18 -0400 Subject: spirv: Implement alpha test --- .../backend/spirv/emit_spirv_special.cpp | 45 ++++++++++++++++++++++ src/shader_recompiler/profile.h | 15 +++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 36 +++++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 7af29e4dd..8bb94f546 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -37,6 +37,48 @@ Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id } throw InvalidArgument("Bad element"); } + +Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) { + switch (comparison) { + case CompareFunction::Never: + return ctx.false_value; + case CompareFunction::Less: + return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2); + case CompareFunction::Equal: + return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::LessThanEqual: + return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Greater: + return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2); + case CompareFunction::NotEqual: + return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::GreaterThanEqual: + return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2); + case CompareFunction::Always: + return ctx.true_value; + } + throw InvalidArgument("Comparison function {}", comparison); +} + +void AlphaTest(EmitContext& ctx) { + const auto comparison{*ctx.profile.alpha_test_func}; + if (comparison == CompareFunction::Always) { + return; + } + const Id type{ctx.F32[1]}; + const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; + const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; + + const Id true_label{ctx.OpLabel()}; + const Id discard_label{ctx.OpLabel()}; + const Id alpha_reference{ctx.Constant(ctx.F32[1], ctx.profile.alpha_test_reference)}; + const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; + + ctx.OpBranchConditional(condition, true_label, discard_label); + ctx.AddLabel(discard_label); + ctx.OpKill(); + ctx.AddLabel(true_label); +} } // Anonymous namespace void EmitPrologue(EmitContext& ctx) { @@ -68,6 +110,9 @@ void EmitEpilogue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { ConvertDepthMode(ctx); } + if (ctx.stage == Stage::Fragment) { + AlphaTest(ctx); + } } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 5ecae71b9..c26017d75 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -5,8 +5,8 @@ #pragma once #include -#include #include +#include #include "common/common_types.h" @@ -27,6 +27,17 @@ enum class InputTopology { TrianglesAdjacency, }; +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + struct TransformFeedbackVarying { u32 buffer{}; u32 stride{}; @@ -66,6 +77,8 @@ struct Profile { InputTopology input_topology{}; std::optional fixed_state_point_size; + std::optional alpha_test_func; + float alpha_test_reference{}; std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index de52d0f30..80f196d0e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -492,6 +492,37 @@ private: u32 read_lowest{}; u32 read_highest{}; }; + +Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return Shader::CompareFunction::Never; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return Shader::CompareFunction::Less; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return Shader::CompareFunction::Equal; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return Shader::CompareFunction::LessThanEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return Shader::CompareFunction::Greater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return Shader::CompareFunction::NotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return Shader::CompareFunction::GreaterThanEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return Shader::CompareFunction::Always; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); + return {}; +} } // Anonymous namespace void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -1016,6 +1047,11 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, } profile.convert_depth_mode = gl_ndc; break; + case Shader::Stage::Fragment: + profile.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; default: break; } -- cgit v1.2.3 From 416e1b7441d34512fcb0ffed014daf7ca4bb62bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Apr 2021 21:36:36 -0300 Subject: spirv: Implement image buffers --- .../backend/spirv/emit_context.cpp | 70 +++++++++++++++------- src/shader_recompiler/backend/spirv/emit_context.h | 7 +++ .../backend/spirv/emit_spirv_image.cpp | 3 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 19 +++++- src/shader_recompiler/shader_info.h | 10 ++++ src/video_core/buffer_cache/buffer_cache.h | 24 ++++++-- src/video_core/renderer_vulkan/pipeline_helper.h | 2 + .../renderer_vulkan/vk_compute_pipeline.cpp | 25 +++++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 ++++++---- 9 files changed, 142 insertions(+), 49 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index f96d5ae37..032cf5e03 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -54,28 +54,30 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { throw InvalidArgument("Invalid texture type {}", desc.type); } +spv::ImageFormat GetImageFormat(ImageFormat format) { + switch (format) { + case ImageFormat::Typeless: + return spv::ImageFormat::Unknown; + case ImageFormat::R8_UINT: + return spv::ImageFormat::R8ui; + case ImageFormat::R8_SINT: + return spv::ImageFormat::R8i; + case ImageFormat::R16_UINT: + return spv::ImageFormat::R16ui; + case ImageFormat::R16_SINT: + return spv::ImageFormat::R16i; + case ImageFormat::R32_UINT: + return spv::ImageFormat::R32ui; + case ImageFormat::R32G32_UINT: + return spv::ImageFormat::Rg32ui; + case ImageFormat::R32G32B32A32_UINT: + return spv::ImageFormat::Rgba32ui; + } + throw InvalidArgument("Invalid image format {}", format); +} + Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { - const spv::ImageFormat format{[&] { - switch (desc.format) { - case ImageFormat::Typeless: - return spv::ImageFormat::Unknown; - case ImageFormat::R8_UINT: - return spv::ImageFormat::R8ui; - case ImageFormat::R8_SINT: - return spv::ImageFormat::R8i; - case ImageFormat::R16_UINT: - return spv::ImageFormat::R16ui; - case ImageFormat::R16_SINT: - return spv::ImageFormat::R16i; - case ImageFormat::R32_UINT: - return spv::ImageFormat::R32ui; - case ImageFormat::R32G32_UINT: - return spv::ImageFormat::Rg32ui; - case ImageFormat::R32G32B32A32_UINT: - return spv::ImageFormat::Rgba32ui; - } - throw InvalidArgument("Invalid image format {}", desc.format); - }()}; + const spv::ImageFormat format{GetImageFormat(desc.format)}; const Id type{ctx.U32[1]}; switch (desc.type) { case TextureType::Color1D: @@ -388,6 +390,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); DefineTextureBuffers(program.info, binding); + DefineImageBuffers(program.info, binding); DefineTextures(program.info, binding); DefineImages(program.info, binding); DefineAttributeMemAccess(program.info); @@ -883,6 +886,31 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { + image_buffers.reserve(info.image_buffer_descriptors.size()); + for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of image buffers"); + } + const spv::ImageFormat format{GetImageFormat(desc.format)}; + const Id image_type{TypeImage(U32[4], spv::Dim::Buffer, false, false, false, 2, format)}; + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("imgbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + const ImageBufferDefinition def{ + .id = id, + .image_type = image_type, + }; + image_buffers.insert(image_buffers.end(), desc.count, def); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 1f0d8be77..0da14d5f8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -35,6 +35,11 @@ struct TextureDefinition { Id image_type; }; +struct ImageBufferDefinition { + Id id; + Id image_type; +}; + struct ImageDefinition { Id id; Id image_type; @@ -136,6 +141,7 @@ public: std::array cbufs{}; std::array ssbos{}; std::vector texture_buffers; + std::vector image_buffers; std::vector textures; std::vector images; @@ -213,6 +219,7 @@ private: void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); + void DefineImageBuffers(const Info& info, u32& binding); void DefineTextures(const Info& info, u32& binding); void DefineImages(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 17266ce77..c8d1d25b1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -149,7 +149,8 @@ Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { throw NotImplementedException("Indirect image indexing"); } if (info.type == TextureType::Buffer) { - throw NotImplementedException("Image buffer"); + const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; + return ctx.OpLoad(def.image_type, def.id); } else { const ImageDefinition def{ctx.images.at(index.U32())}; return ctx.OpLoad(def.image_type, def.id); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index a7b1fcfad..e1d5a2ce1 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -158,9 +158,11 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, + ImageBufferDescriptors& image_buffer_descriptors_, TextureDescriptors& texture_descriptors_, ImageDescriptors& image_descriptors_) : texture_buffer_descriptors{texture_buffer_descriptors_}, + image_buffer_descriptors{image_buffer_descriptors_}, texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} u32 Add(const TextureBufferDescriptor& desc) { @@ -170,6 +172,13 @@ public: }); } + u32 Add(const ImageBufferDescriptor& desc) { + return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset; + }); + } + u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { return desc.cbuf_index == existing.cbuf_index && @@ -200,6 +209,7 @@ private: } TextureBufferDescriptors& texture_buffer_descriptors; + ImageBufferDescriptors& image_buffer_descriptors; TextureDescriptors& texture_descriptors; ImageDescriptors& image_descriptors; }; @@ -224,6 +234,7 @@ void TexturePass(Environment& env, IR::Program& program) { }); Descriptors descriptors{ program.info.texture_buffer_descriptors, + program.info.image_buffer_descriptors, program.info.texture_descriptors, program.info.image_descriptors, }; @@ -261,7 +272,13 @@ void TexturePass(Environment& env, IR::Program& program) { case IR::Opcode::ImageWrite: { const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { - throw NotImplementedException("Image buffer"); + index = descriptors.Add(ImageBufferDescriptor{ + .format = flags.image_format, + .is_written = is_written, + .cbuf_index = cbuf.index, + .cbuf_offset = cbuf.offset, + .count = 1, + }); } else { index = descriptors.Add(ImageDescriptor{ .type = flags.type, diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e078b0fa1..336c6131a 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -67,6 +67,15 @@ struct TextureBufferDescriptor { }; using TextureBufferDescriptors = boost::container::small_vector; +struct ImageBufferDescriptor { + ImageFormat format; + bool is_written; + u32 cbuf_index; + u32 cbuf_offset; + u32 count; +}; +using ImageBufferDescriptors = boost::container::small_vector; + struct TextureDescriptor { TextureType type; bool is_depth; @@ -153,6 +162,7 @@ struct Info { constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; + ImageBufferDescriptors image_buffer_descriptors; TextureDescriptors texture_descriptors; ImageDescriptors image_descriptors; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6701aab82..29746f61d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + PixelFormat format, bool is_written); void UnbindComputeStorageBuffers(); @@ -163,8 +163,8 @@ public: void UnbindComputeTextureBuffers(); - void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format); + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, + bool is_written); void FlushCachedWrites(); @@ -393,7 +393,9 @@ private: u32 written_compute_storage_buffers = 0; std::array enabled_texture_buffers{}; + std::array written_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -700,12 +702,14 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; + written_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format) { + u32 size, PixelFormat format, bool is_written) { enabled_texture_buffers[stage] |= 1U << tbo_index; + written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -732,12 +736,14 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; + written_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format) { + PixelFormat format, bool is_written) { enabled_compute_texture_buffers |= 1U << tbo_index; + written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1274,6 +1280,10 @@ void BufferCache

::UpdateTextureBuffers(size_t stage) { ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { Binding& binding = texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark buffer as written if needed + if (((written_texture_buffers[stage] >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } @@ -1343,6 +1353,10 @@ void BufferCache

::UpdateComputeTextureBuffers() { ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { Binding& binding = compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark as written if needed + if (((written_compute_texture_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } }); } diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index e2167dc4b..aaf9a735e 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -88,6 +88,7 @@ public: Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); } @@ -126,6 +127,7 @@ inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& sam const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { image_view_ids += info.texture_buffer_descriptors.size(); + image_view_ids += info.image_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { const VkSampler sampler{*(samplers++)}; ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3d690f335..3c907ec5a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -97,10 +97,12 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -108,24 +110,29 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); buffer_cache.UnbindComputeTextureBuffers(); ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format); + image_view.format, is_written); ++texture_buffer_ids; ++index; - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 23c01f24e..84720a6f9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,10 +175,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_image{[&](const auto& desc) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); - } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_image); + std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -186,28 +188,33 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.image_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } + std::ranges::for_each(info.image_descriptors, add_image); } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); size_t index{}; - for (const auto& desc : info.texture_buffer_descriptors) { + const auto add_buffer{[&](const auto& desc) { ASSERT(desc.count == 1); - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format); + image_view.BufferSize(), image_view.format, + is_written); ++index; ++texture_buffer_index; - } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); texture_buffer_index += info.texture_descriptors.size(); + texture_buffer_index += info.image_descriptors.size(); } buffer_cache.UpdateGraphicsBuffers(is_indexed); -- cgit v1.2.3 From 7ae3ea6beefae76e6671436114863fce1baacd9e Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 15 Apr 2021 19:01:45 -0400 Subject: vk_pipeline_cache: Silence GCC warnings Silences `-Werror=missing-field-initializers` due to missing initializers. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 80f196d0e..ee22255bf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -696,6 +696,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .generic_input_types{}, .fixed_state_point_size{}, + .alpha_test_func{}, + .xfb_varyings{}, }; } -- cgit v1.2.3 From 183855e396cc6918d36fbf3e38ea426e934b4e3e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 15 Apr 2021 22:46:11 -0300 Subject: shader: Implement tessellation shaders, polygon mode and invocation id --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 147 ++++++++++++++------ src/shader_recompiler/backend/spirv/emit_context.h | 10 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 39 ++++++ src/shader_recompiler/backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_context_get_set.cpp | 88 ++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 12 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 4 + .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + src/shader_recompiler/frontend/ir/patch.cpp | 28 ++++ src/shader_recompiler/frontend/ir/patch.h | 149 +++++++++++++++++++++ src/shader_recompiler/frontend/ir/type.h | 41 +++--- src/shader_recompiler/frontend/ir/value.cpp | 9 ++ src/shader_recompiler/frontend/ir/value.h | 4 + src/shader_recompiler/frontend/maxwell/program.cpp | 5 + .../translate/impl/load_store_attribute.cpp | 33 +++-- .../translate/impl/move_special_register.cpp | 2 + .../ir_opt/collect_shader_info_pass.cpp | 41 ++++++ src/shader_recompiler/profile.h | 16 +++ src/shader_recompiler/shader_info.h | 5 + src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 13 ++ src/video_core/renderer_vulkan/maxwell_to_vk.h | 2 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 30 +++++ .../renderer_vulkan/vk_staging_buffer_pool.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 +- 28 files changed, 605 insertions(+), 91 deletions(-) create mode 100644 src/shader_recompiler/frontend/ir/patch.cpp create mode 100644 src/shader_recompiler/frontend/ir/patch.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index bbbfa98a3..7c11d15bf 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -41,6 +41,8 @@ add_library(shader_recompiler STATIC frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc + frontend/ir/patch.cpp + frontend/ir/patch.h frontend/ir/post_order.cpp frontend/ir/post_order.h frontend/ir/pred.h diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 032cf5e03..067f61613 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -125,19 +125,36 @@ u32 NumVertices(InputTopology input_topology) { throw InvalidArgument("Invalid input topology {}", input_topology); } -Id DefineInput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { - if (ctx.stage == Stage::Geometry) { - const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); +Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, + std::optional builtin = std::nullopt) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + if (per_invocation) { + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 32u)); + } + break; + case Stage::Geometry: + if (per_invocation) { + const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + } + break; + default: + break; } return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); } -Id DefineOutput(EmitContext& ctx, Id type, std::optional builtin = std::nullopt) { +Id DefineOutput(EmitContext& ctx, Id type, std::optional invocations, + std::optional builtin = std::nullopt) { + if (invocations && ctx.stage == Stage::TessellationControl) { + type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], *invocations)); + } return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } -void DefineGenericOutput(EmitContext& ctx, size_t index) { +void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invocations) { static constexpr std::string_view swizzle{"xyzw"}; const size_t base_attr_index{static_cast(IR::Attribute::Generic0X) + index * 4}; u32 element{0}; @@ -150,7 +167,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) { } const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; - const Id id{DefineOutput(ctx, ctx.F32[num_components])}; + const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; ctx.Decorate(id, spv::Decoration::Location, static_cast(index)); if (element > 0) { ctx.Decorate(id, spv::Decoration::Component, element); @@ -161,10 +178,10 @@ void DefineGenericOutput(EmitContext& ctx, size_t index) { ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); } if (num_components < 4 || element > 0) { - ctx.Name(id, fmt::format("out_attr{}", index)); - } else { const std::string_view subswizzle{swizzle.substr(element, num_components)}; ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); + } else { + ctx.Name(id, fmt::format("out_attr{}", index)); } const GenericElementInfo info{ .id = id, @@ -383,7 +400,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); - DefineInterfaces(program.info); + DefineInterfaces(program); DefineLocalMemory(program); DefineSharedMemory(program); DefineSharedMemoryFunctions(program); @@ -472,9 +489,9 @@ void EmitContext::DefineCommonConstants() { f32_zero_value = Constant(F32[1], 0.0f); } -void EmitContext::DefineInterfaces(const Info& info) { - DefineInputs(info); - DefineOutputs(info); +void EmitContext::DefineInterfaces(const IR::Program& program) { + DefineInputs(program.info); + DefineOutputs(program); } void EmitContext::DefineLocalMemory(const IR::Program& program) { @@ -972,26 +989,29 @@ void EmitContext::DefineLabels(IR::Program& program) { void EmitContext::DefineInputs(const Info& info) { if (info.uses_workgroup_id) { - workgroup_id = DefineInput(*this, U32[3], spv::BuiltIn::WorkgroupId); + workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } if (info.uses_local_invocation_id) { - local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); + local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); + } + if (info.uses_invocation_id) { + invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); } if (info.uses_is_helper_invocation) { - is_helper_invocation = DefineInput(*this, U1, spv::BuiltIn::HelperInvocation); + is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } if (info.uses_subgroup_mask) { - subgroup_mask_eq = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupEqMaskKHR); - subgroup_mask_lt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLtMaskKHR); - subgroup_mask_le = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupLeMaskKHR); - subgroup_mask_gt = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGtMaskKHR); - subgroup_mask_ge = DefineInput(*this, U32[4], spv::BuiltIn::SubgroupGeMaskKHR); + subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); + subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); + subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); + subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); + subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); } if (info.uses_subgroup_invocation_id || (profile.warp_size_potentially_larger_than_guest && (info.uses_subgroup_vote || info.uses_subgroup_mask))) { subgroup_local_invocation_id = - DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); + DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); } if (info.uses_fswzadd) { const Id f32_one{Constant(F32[1], 1.0f)}; @@ -1004,29 +1024,32 @@ void EmitContext::DefineInputs(const Info& info) { if (info.loads_position) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; - input_position = DefineInput(*this, F32[4], built_in); + input_position = DefineInput(*this, F32[4], true, built_in); } if (info.loads_instance_id) { if (profile.support_vertex_instance_id) { - instance_id = DefineInput(*this, U32[1], spv::BuiltIn::InstanceId); + instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { - instance_index = DefineInput(*this, U32[1], spv::BuiltIn::InstanceIndex); - base_instance = DefineInput(*this, U32[1], spv::BuiltIn::BaseInstance); + instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); + base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } if (info.loads_vertex_id) { if (profile.support_vertex_instance_id) { - vertex_id = DefineInput(*this, U32[1], spv::BuiltIn::VertexId); + vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { - vertex_index = DefineInput(*this, U32[1], spv::BuiltIn::VertexIndex); - base_vertex = DefineInput(*this, U32[1], spv::BuiltIn::BaseVertex); + vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); + base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } if (info.loads_front_face) { - front_face = DefineInput(*this, U1, spv::BuiltIn::FrontFacing); + front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } if (info.loads_point_coord) { - point_coord = DefineInput(*this, F32[2], spv::BuiltIn::PointCoord); + point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); + } + if (info.loads_tess_coord) { + tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { const InputVarying generic{info.input_generics[index]}; @@ -1038,7 +1061,7 @@ void EmitContext::DefineInputs(const Info& info) { continue; } const Id type{GetAttributeType(*this, input_type)}; - const Id id{DefineInput(*this, type)}; + const Id id{DefineInput(*this, type, true)}; Decorate(id, spv::Decoration::Location, static_cast(index)); Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; @@ -1059,58 +1082,98 @@ void EmitContext::DefineInputs(const Info& info) { break; } } + if (stage == Stage::TessellationEval) { + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineInput(*this, F32[4], false)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast(index)); + patches[index] = id; + } + } } -void EmitContext::DefineOutputs(const Info& info) { +void EmitContext::DefineOutputs(const IR::Program& program) { + const Info& info{program.info}; + const std::optional invocations{program.invocations}; if (info.stores_position || stage == Stage::VertexB) { - output_position = DefineOutput(*this, F32[4], spv::BuiltIn::Position); + output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } if (info.stores_point_size || profile.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } - output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); + output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } if (info.stores_clip_distance) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; - clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); + clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores_layer && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } - layer = DefineOutput(*this, U32[1], spv::BuiltIn::Layer); + layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } if (info.stores_viewport_index && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } - viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); + viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { - DefineGenericOutput(*this, index); + DefineGenericOutput(*this, index, invocations); } } - if (stage == Stage::Fragment) { + switch (stage) { + case Stage::TessellationControl: + if (info.stores_tess_level_outer) { + const Id type{TypeArray(F32[1], Constant(U32[1], 4))}; + output_tess_level_outer = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); + Decorate(output_tess_level_outer, spv::Decoration::Patch); + } + if (info.stores_tess_level_inner) { + const Id type{TypeArray(F32[1], Constant(U32[1], 2))}; + output_tess_level_inner = + DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); + Decorate(output_tess_level_inner, spv::Decoration::Patch); + } + for (size_t index = 0; index < info.uses_patches.size(); ++index) { + if (!info.uses_patches[index]) { + continue; + } + const Id id{DefineOutput(*this, F32[4], std::nullopt)}; + Decorate(id, spv::Decoration::Patch); + Decorate(id, spv::Decoration::Location, static_cast(index)); + patches[index] = id; + } + break; + case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { if (!info.stores_frag_color[index]) { continue; } - frag_color[index] = DefineOutput(*this, F32[4]); + frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); Decorate(frag_color[index], spv::Decoration::Location, index); Name(frag_color[index], fmt::format("frag_color{}", index)); } if (info.stores_frag_depth) { - frag_depth = DefineOutput(*this, F32[1]); + frag_depth = DefineOutput(*this, F32[1], std::nullopt); Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); Name(frag_depth, "frag_depth"); } + break; + default: + break; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 0da14d5f8..ba0a253b3 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -147,6 +147,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; + Id invocation_id{}; Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; @@ -162,6 +163,7 @@ public: Id base_vertex{}; Id front_face{}; Id point_coord{}; + Id tess_coord{}; Id clip_distances{}; Id layer{}; Id viewport_index{}; @@ -204,6 +206,10 @@ public: Id output_position{}; std::array, 32> output_generics{}; + Id output_tess_level_outer{}; + Id output_tess_level_inner{}; + std::array patches{}; + std::array frag_color{}; Id frag_depth{}; @@ -212,7 +218,7 @@ public: private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); - void DefineInterfaces(const Info& info); + void DefineInterfaces(const IR::Program& program); void DefineLocalMemory(const IR::Program& program); void DefineSharedMemory(const IR::Program& program); void DefineSharedMemoryFunctions(const IR::Program& program); @@ -226,7 +232,7 @@ private: void DefineLabels(IR::Program& program); void DefineInputs(const Info& info); - void DefineOutputs(const Info& info); + void DefineOutputs(const IR::Program& program); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3bf4c6a9e..105602ccf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -45,6 +45,8 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { return arg.Label(); } else if constexpr (std::is_same_v) { return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); } else if constexpr (std::is_same_v) { return arg.Reg(); } @@ -120,6 +122,30 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { return main; } +spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { + switch (primitive) { + case TessPrimitive::Isolines: + return spv::ExecutionMode::Isolines; + case TessPrimitive::Triangles: + return spv::ExecutionMode::Triangles; + case TessPrimitive::Quads: + return spv::ExecutionMode::Quads; + } + throw InvalidArgument("Tessellation primitive {}", primitive); +} + +spv::ExecutionMode ExecutionMode(TessSpacing spacing) { + switch (spacing) { + case TessSpacing::Equal: + return spv::ExecutionMode::SpacingEqual; + case TessSpacing::FractionalOdd: + return spv::ExecutionMode::SpacingFractionalOdd; + case TessSpacing::FractionalEven: + return spv::ExecutionMode::SpacingFractionalEven; + } + throw InvalidArgument("Tessellation spacing {}", spacing); +} + void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; @@ -134,6 +160,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Stage::VertexB: execution_model = spv::ExecutionModel::Vertex; break; + case Stage::TessellationControl: + execution_model = spv::ExecutionModel::TessellationControl; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); + break; + case Stage::TessellationEval: + execution_model = spv::ExecutionModel::TessellationEvaluation; + ctx.AddCapability(spv::Capability::Tessellation); + ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing)); + ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); + break; case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 55b2edba0..8caf30f1b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -55,6 +55,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); void EmitGetZFlag(EmitContext& ctx); @@ -67,6 +69,7 @@ void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 59c56c5ba..4a1aeece5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -32,13 +32,26 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { template Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return ctx.OpAccessChain(pointer_type, base, vertex, std::forward(args)...); - } else { + default: return ctx.OpAccessChain(pointer_type, base, std::forward(args)...); } } +template +Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { + if (ctx.stage == Stage::TessellationControl) { + const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; + return ctx.OpAccessChain(result_type, base, invocation_id, std::forward(args)...); + } else { + return ctx.OpAccessChain(result_type, base, std::forward(args)...); + } +} + std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; @@ -49,7 +62,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } else { const u32 index_element{element - info.first_element}; const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; - return ctx.OpAccessChain(ctx.output_f32, info.id, index_id); + return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } switch (attr) { @@ -61,7 +74,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionW: { const u32 element{static_cast(attr) % 4}; const Id element_id{ctx.Constant(ctx.U32[1], element)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id); + return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: @@ -74,7 +87,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { const u32 base{static_cast(IR::Attribute::ClipDistance0)}; const u32 index{static_cast(attr) - base}; const Id clip_num{ctx.Constant(ctx.U32[1], index)}; - return ctx.OpAccessChain(ctx.output_f32, ctx.clip_distances, clip_num); + return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::Layer: return ctx.profile.support_viewport_index_layer_non_geometry || @@ -222,11 +235,18 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { ctx.Constant(ctx.U32[1], std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, - ctx.u32_zero_value)); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, + ctx.Constant(ctx.U32[1], 1U))); + case IR::Attribute::TessellationEvaluationPointU: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); + case IR::Attribute::TessellationEvaluationPointV: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, + ctx.Constant(ctx.U32[1], 1U))); + default: throw NotImplementedException("Read attribute {}", attr); } @@ -240,9 +260,12 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_un } Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { - if (ctx.stage == Stage::Geometry) { + switch (ctx.stage) { + case Stage::TessellationControl: + case Stage::TessellationEval: + case Stage::Geometry: return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); - } else { + default: return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); } } @@ -251,6 +274,45 @@ void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unus ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); } +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Non-generic patch load"); + } + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)}; + return ctx.OpLoad(ctx.F32[1], pointer); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { + const Id pointer{[&] { + if (IR::IsGeneric(patch)) { + const u32 index{IR::GenericPatchIndex(patch)}; + const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodBottom: { + const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; + const Id index_id{ctx.Constant(ctx.U32[1], index)}; + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); + } + case IR::Patch::TessellationLodInteriorU: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.u32_zero_value); + case IR::Patch::TessellationLodInteriorV: + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, + ctx.Constant(ctx.U32[1], 1u)); + default: + throw NotImplementedException("Patch {}", patch); + } + }()}; + ctx.OpStore(pointer, value); +} + void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { const Id component_id{ctx.Constant(ctx.U32[1], component)}; const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; @@ -301,6 +363,10 @@ Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } +Id EmitInvocationId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); +} + Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index d66eb17a6..b821d9f47 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -331,6 +331,14 @@ void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, c Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); } +F32 IREmitter::GetPatch(Patch patch) { + return Inst(Opcode::GetPatch, patch); +} + +void IREmitter::SetPatch(Patch patch, const F32& value) { + Inst(Opcode::SetPatch, patch, value); +} + void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); } @@ -363,6 +371,10 @@ U32 IREmitter::LocalInvocationIdZ() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; } +U32 IREmitter::InvocationId() { + return Inst(Opcode::InvocationId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e70359eb1..7f8f1ad42 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -84,6 +84,9 @@ public: [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); + [[nodiscard]] F32 GetPatch(Patch patch); + void SetPatch(Patch patch, const F32& value); + void SetFragColor(u32 index, u32 component, const F32& value); void SetFragDepth(const F32& value); @@ -95,6 +98,7 @@ public: [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); + [[nodiscard]] U32 InvocationId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 204c55fa8..b2d7573d9 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -73,6 +73,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::EndPrimitive: case Opcode::SetAttribute: case Opcode::SetAttributeIndexed: + case Opcode::SetPatch: case Opcode::SetFragColor: case Opcode::SetFragDepth: case Opcode::WriteGlobalU8: diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp index 7d3e0b2ab..7f04b647b 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.cpp +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp @@ -24,6 +24,7 @@ constexpr Type Label{Type::Label}; constexpr Type Reg{Type::Reg}; constexpr Type Pred{Type::Pred}; constexpr Type Attribute{Type::Attribute}; +constexpr Type Patch{Type::Patch}; constexpr Type U1{Type::U1}; constexpr Type U8{Type::U8}; constexpr Type U16{Type::U16}; diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 7a21fe746..a86542cd8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -48,6 +48,8 @@ OPCODE(GetAttribute, F32, Attr OPCODE(SetAttribute, Void, Attribute, F32, U32, ) OPCODE(GetAttributeIndexed, F32, U32, U32, ) OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) +OPCODE(GetPatch, F32, Patch, ) +OPCODE(SetPatch, Void, Patch, F32, ) OPCODE(SetFragColor, Void, U32, U32, F32, ) OPCODE(SetFragDepth, Void, F32, ) OPCODE(GetZFlag, U1, Void, ) @@ -60,6 +62,7 @@ OPCODE(SetCFlag, Void, U1, OPCODE(SetOFlag, Void, U1, ) OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) +OPCODE(InvocationId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..1f770bc48 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -0,0 +1,28 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/frontend/ir/patch.h" +#include "shader_recompiler/exception.h" + +namespace Shader::IR { + +bool IsGeneric(Patch patch) noexcept { + return patch >= Patch::Component0 && patch <= Patch::Component119; +} + +u32 GenericPatchIndex(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) / 4; +} + +u32 GenericPatchElement(Patch patch) { + if (!IsGeneric(patch)) { + throw InvalidArgument("Patch {} is not generic", patch); + } + return (static_cast(patch) - static_cast(Patch::Component0)) % 4; +} + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h @@ -0,0 +1,149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::IR { + +enum class Patch : u64 { + TessellationLodLeft, + TessellationLodTop, + TessellationLodRight, + TessellationLodBottom, + TessellationLodInteriorU, + TessellationLodInteriorV, + ComponentPadding0, + ComponentPadding1, + Component0, + Component1, + Component2, + Component3, + Component4, + Component5, + Component6, + Component7, + Component8, + Component9, + Component10, + Component11, + Component12, + Component13, + Component14, + Component15, + Component16, + Component17, + Component18, + Component19, + Component20, + Component21, + Component22, + Component23, + Component24, + Component25, + Component26, + Component27, + Component28, + Component29, + Component30, + Component31, + Component32, + Component33, + Component34, + Component35, + Component36, + Component37, + Component38, + Component39, + Component40, + Component41, + Component42, + Component43, + Component44, + Component45, + Component46, + Component47, + Component48, + Component49, + Component50, + Component51, + Component52, + Component53, + Component54, + Component55, + Component56, + Component57, + Component58, + Component59, + Component60, + Component61, + Component62, + Component63, + Component64, + Component65, + Component66, + Component67, + Component68, + Component69, + Component70, + Component71, + Component72, + Component73, + Component74, + Component75, + Component76, + Component77, + Component78, + Component79, + Component80, + Component81, + Component82, + Component83, + Component84, + Component85, + Component86, + Component87, + Component88, + Component89, + Component90, + Component91, + Component92, + Component93, + Component94, + Component95, + Component96, + Component97, + Component98, + Component99, + Component100, + Component101, + Component102, + Component103, + Component104, + Component105, + Component106, + Component107, + Component108, + Component109, + Component110, + Component111, + Component112, + Component113, + Component114, + Component115, + Component116, + Component117, + Component118, + Component119, +}; +static_assert(static_cast(Patch::Component119) == 127); + +[[nodiscard]] bool IsGeneric(Patch patch) noexcept; + +[[nodiscard]] u32 GenericPatchIndex(Patch patch); + +[[nodiscard]] u32 GenericPatchElement(Patch patch); + +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h index 9a32ca1e8..8b3b33852 100644 --- a/src/shader_recompiler/frontend/ir/type.h +++ b/src/shader_recompiler/frontend/ir/type.h @@ -20,26 +20,27 @@ enum class Type { Reg = 1 << 2, Pred = 1 << 3, Attribute = 1 << 4, - U1 = 1 << 5, - U8 = 1 << 6, - U16 = 1 << 7, - U32 = 1 << 8, - U64 = 1 << 9, - F16 = 1 << 10, - F32 = 1 << 11, - F64 = 1 << 12, - U32x2 = 1 << 13, - U32x3 = 1 << 14, - U32x4 = 1 << 15, - F16x2 = 1 << 16, - F16x3 = 1 << 17, - F16x4 = 1 << 18, - F32x2 = 1 << 19, - F32x3 = 1 << 20, - F32x4 = 1 << 21, - F64x2 = 1 << 22, - F64x3 = 1 << 23, - F64x4 = 1 << 24, + Patch = 1 << 5, + U1 = 1 << 6, + U8 = 1 << 7, + U16 = 1 << 8, + U32 = 1 << 9, + U64 = 1 << 10, + F16 = 1 << 11, + F32 = 1 << 12, + F64 = 1 << 13, + U32x2 = 1 << 14, + U32x3 = 1 << 15, + U32x4 = 1 << 16, + F16x2 = 1 << 17, + F16x3 = 1 << 18, + F16x4 = 1 << 19, + F32x2 = 1 << 20, + F32x3 = 1 << 21, + F32x4 = 1 << 22, + F64x2 = 1 << 23, + F64x3 = 1 << 24, + F64x4 = 1 << 25, }; DECLARE_ENUM_FLAG_OPERATORS(Type) diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index 1e7ffb86d..bf5f8c0c2 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -18,6 +18,8 @@ Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} +Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} + Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} @@ -109,6 +111,11 @@ IR::Attribute Value::Attribute() const { return attribute; } +IR::Patch Value::Patch() const { + ValidateAccess(Type::Patch); + return patch; +} + bool Value::U1() const { if (IsIdentity()) { return inst->Arg(0).U1(); @@ -182,6 +189,8 @@ bool Value::operator==(const Value& other) const { return pred == other.pred; case Type::Attribute: return attribute == other.attribute; + case Type::Patch: + return patch == other.patch; case Type::U1: return imm_u1 == other.imm_u1; case Type::U8: diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index a0962863d..303745563 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -9,6 +9,7 @@ #include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" namespace Shader::IR { @@ -24,6 +25,7 @@ public: explicit Value(IR::Reg value) noexcept; explicit Value(IR::Pred value) noexcept; explicit Value(IR::Attribute value) noexcept; + explicit Value(IR::Patch value) noexcept; explicit Value(bool value) noexcept; explicit Value(u8 value) noexcept; explicit Value(u16 value) noexcept; @@ -46,6 +48,7 @@ public: [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Attribute Attribute() const; + [[nodiscard]] IR::Patch Patch() const; [[nodiscard]] bool U1() const; [[nodiscard]] u8 U8() const; [[nodiscard]] u16 U16() const; @@ -67,6 +70,7 @@ private: IR::Reg reg; IR::Pred pred; IR::Attribute attribute; + IR::Patch patch; bool imm_u1; u8 imm_u8; u16 imm_u16; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ab67446c8..20a1d61cc 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -70,6 +70,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool size; } const ald{insn}; - if (ald.o != 0) { - throw NotImplementedException("O"); - } - if (ald.patch != 0) { - throw NotImplementedException("P"); - } const u64 offset{ald.absolute_offset.Value()}; if (offset % 4 != 0) { throw NotImplementedException("Unaligned absolute offset {}", offset); @@ -84,11 +78,19 @@ void TranslatorVisitor::ALD(u64 insn) { const u32 num_elements{NumElements(ald.size)}; if (ald.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + if (ald.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetPatch(patch)); + } else { + const IR::Attribute attr{offset / 4 + element}; + F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + } } return; } + if (ald.patch != 0) { + throw NotImplementedException("Indirect patch read"); + } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); }); @@ -106,9 +108,6 @@ void TranslatorVisitor::AST(u64 insn) { BitField<47, 2, Size> size; } const ast{insn}; - if (ast.patch != 0) { - throw NotImplementedException("P"); - } if (ast.index_reg != IR::Reg::RZ) { throw NotImplementedException("Indexed store"); } @@ -120,11 +119,19 @@ void TranslatorVisitor::AST(u64 insn) { const u32 num_elements{NumElements(ast.size)}; if (ast.index_reg == IR::Reg::RZ) { for (u32 element = 0; element < num_elements; ++element) { - const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + if (ast.patch != 0) { + const IR::Patch patch{offset / 4 + element}; + ir.SetPatch(patch, F(ast.src_reg + element)); + } else { + const IR::Attribute attr{offset / 4 + element}; + ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + } } return; } + if (ast.patch != 0) { + throw NotImplementedException("Indexed tessellation patch store"); + } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); }); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index bc822d585..660b84c20 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -113,6 +113,8 @@ enum class SpecialRegister : u64 { [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { switch (special_register) { + case SpecialRegister::SR_INVOCATION_ID: + return ir.InvocationId(); case SpecialRegister::SR_THREAD_KILL: return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; case SpecialRegister::SR_INVOCATION_INFO: diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 617ec05ce..aadcf7999 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -53,6 +53,10 @@ void GetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::PointSpriteT: info.loads_point_coord = true; break; + case IR::Attribute::TessellationEvaluationPointU: + case IR::Attribute::TessellationEvaluationPointV: + info.loads_tess_coord = true; + break; default: throw NotImplementedException("Get attribute {}", attribute); } @@ -94,6 +98,34 @@ void SetAttribute(Info& info, IR::Attribute attribute) { } } +void GetPatch(Info& info, IR::Patch patch) { + if (!IR::IsGeneric(patch)) { + throw NotImplementedException("Reading non-generic patch {}", patch); + } + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; +} + +void SetPatch(Info& info, IR::Patch patch) { + if (IR::IsGeneric(patch)) { + info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; + return; + } + switch (patch) { + case IR::Patch::TessellationLodLeft: + case IR::Patch::TessellationLodTop: + case IR::Patch::TessellationLodRight: + case IR::Patch::TessellationLodBottom: + info.stores_tess_level_outer = true; + break; + case IR::Patch::TessellationLodInteriorU: + case IR::Patch::TessellationLodInteriorV: + info.stores_tess_level_inner = true; + break; + default: + throw NotImplementedException("Set patch {}", patch); + } +} + void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::CompositeConstructF16x2: @@ -350,6 +382,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::SetAttribute: SetAttribute(info, inst.Arg(0).Attribute()); break; + case IR::Opcode::GetPatch: + GetPatch(info, inst.Arg(0).Patch()); + break; + case IR::Opcode::SetPatch: + SetPatch(info, inst.Arg(0).Patch()); + break; case IR::Opcode::GetAttributeIndexed: info.loads_indexed_attributes = true; break; @@ -368,6 +406,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::InvocationId: + info.uses_invocation_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c26017d75..3a04f075e 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -38,6 +38,18 @@ enum class CompareFunction { Always, }; +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + struct TransformFeedbackVarying { u32 buffer{}; u32 stride{}; @@ -74,6 +86,10 @@ struct Profile { bool convert_depth_mode{}; bool force_early_z{}; + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + InputTopology input_topology{}; std::optional fixed_state_point_size; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 336c6131a..4dbf9ed12 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -101,8 +101,10 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; + bool uses_invocation_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; + std::array uses_patches{}; std::array input_generics{}; bool loads_position{}; @@ -110,6 +112,7 @@ struct Info { bool loads_vertex_id{}; bool loads_front_face{}; bool loads_point_coord{}; + bool loads_tess_coord{}; bool loads_indexed_attributes{}; std::array stores_frag_color{}; @@ -120,6 +123,8 @@ struct Info { bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_tess_level_outer{}; + bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; bool uses_fp16{}; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index dc4ff0da2..8f0b0b8ec 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -685,6 +685,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { return {}; } +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return VK_POLYGON_MODE_POINT; + case Maxwell::PolygonMode::Line: + return VK_POLYGON_MODE_LINE; + case Maxwell::PolygonMode::Fill: + return VK_POLYGON_MODE_FILL; + } + UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); + return {}; +} + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { switch (swizzle) { case Tegra::Texture::SwizzleSource::Zero: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 9f78e15b6..50a599c11 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -65,6 +65,8 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); +VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); + VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 84720a6f9..d5e9dae0f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -355,7 +355,8 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), - .polygonMode = VK_POLYGON_MODE_FILL, + .polygonMode = + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ee22255bf..0bccc640a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1040,6 +1040,36 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), &CastAttributeType); break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + profile.tess_clockwise = key.state.tessellation_clockwise == 0; + profile.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + profile.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; case Shader::Stage::Geometry: if (program.output_topology == Shader::OutputTopology::PointList) { profile.fixed_state_point_size = point_size; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem .flags = 0, .size = STREAM_BUFFER_SIZE, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 87cfe6312..f0de19ba1 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -225,7 +225,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .drawIndirectFirstInstance = false, .depthClamp = true, .depthBiasClamp = true, - .fillModeNonSolid = false, + .fillModeNonSolid = true, .depthBounds = false, .wideLines = false, .largePoints = true, @@ -670,6 +670,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.largePoints, "largePoints"), std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), + std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), -- cgit v1.2.3 From e3514bcd6b09f623da14c4f3c4ffd988e75577ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 16:31:15 -0300 Subject: spirv: Implement ViewportMask with NV_viewport_array2 --- src/shader_recompiler/backend/spirv/emit_context.cpp | 4 ++++ src/shader_recompiler/backend/spirv/emit_context.h | 2 ++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 4 ++++ src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 5 +++++ src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 3 +++ src/shader_recompiler/profile.h | 1 + src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 10 files changed, 32 insertions(+) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3946dab14..2f8678b4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -457,6 +457,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); + output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); if (info.uses_int8) { AddCapability(spv::Capability::Int8); @@ -1131,6 +1132,9 @@ void EmitContext::DefineOutputs(const IR::Program& program) { } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Constant(U32[1], 1u)), std::nullopt); + } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { DefineGenericOutput(*this, index, invocations); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c7d6f8a38..c41cad098 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -134,6 +134,7 @@ public: Id input_s32{}; Id output_f32{}; + Id output_u32{}; Id image_buffer_type{}; Id sampled_texture_buffer_type{}; @@ -167,6 +168,7 @@ public: Id clip_distances{}; Id layer{}; Id viewport_index{}; + Id viewport_mask{}; Id primitive_id{}; Id fswzadd_lut_a{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 105602ccf..90c4833a8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -303,6 +303,10 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.stores_viewport_index) { ctx.AddCapability(spv::Capability::MultiViewport); } + if (info.stores_viewport_mask && profile.support_viewport_mask) { + ctx.AddExtension("SPV_NV_viewport_array2"); + ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); + } if (info.stores_layer || info.stores_viewport_index) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f3de577f6..ca067f1c4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -99,6 +99,11 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { ctx.stage == Shader::Stage::Geometry ? std::optional{ctx.viewport_index} : std::nullopt; + case IR::Attribute::ViewportMask: + if (!ctx.profile.support_viewport_mask) { + return std::nullopt; + } + return ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value); default: throw NotImplementedException("Read attribute {}", attr); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index c84bf211f..9631a445e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -96,6 +96,9 @@ void SetAttribute(Info& info, IR::Attribute attribute) { case IR::Attribute::ViewportIndex: info.stores_viewport_index = true; break; + case IR::Attribute::ViewportMask: + info.stores_viewport_mask = true; + break; default: throw NotImplementedException("Set attribute {}", attribute); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3a04f075e..a2c2948d5 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -75,6 +75,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_vote{}; bool support_viewport_index_layer_non_geometry{}; + bool support_viewport_mask{}; bool support_typeless_image_loads{}; bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d6cde1596..d33df8aad 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -124,6 +124,7 @@ struct Info { bool stores_clip_distance{}; bool stores_layer{}; bool stores_viewport_index{}; + bool stores_viewport_mask{}; bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool stores_indexed_attributes{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0bccc640a..4d0d3ebb7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -690,6 +690,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, .support_vote = true, .support_viewport_index_layer_non_geometry = device.IsExtShaderViewportIndexLayerSupported(), + .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f0de19ba1..72b83f99a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -346,6 +346,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); } + if (!nv_viewport_array2) { + LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -724,6 +728,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { } }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); + test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4e6d13308..4415558bb 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -169,6 +169,11 @@ public: return nv_viewport_swizzle; } + /// Returns true if the device supports VK_NV_viewport_array2. + bool IsNvViewportArray2Supported() const { + return nv_viewport_array2; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -312,6 +317,7 @@ private: bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. + bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 95815a3883d708f71db5119f42243e183f32f9a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 16 Apr 2021 17:22:59 -0300 Subject: shader: Implement PIXLD.MY_INDEX --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/spirv/emit_context.cpp | 3 ++ src/shader_recompiler/backend/spirv/emit_context.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 3 ++ src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../maxwell/translate/impl/not_implemented.cpp | 4 -- .../frontend/maxwell/translate/impl/pixel_load.cpp | 46 ++++++++++++++++++++++ .../ir_opt/collect_shader_info_pass.cpp | 3 ++ src/shader_recompiler/shader_info.h | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 3 +- 14 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 7c11d15bf..07963a760 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -137,6 +137,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/impl/output_geometry.cpp + frontend/maxwell/translate/impl/pixel_load.cpp frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2f8678b4e..0b4abeb44 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -998,6 +998,9 @@ void EmitContext::DefineInputs(const Info& info) { if (info.uses_invocation_id) { invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); } + if (info.uses_sample_id) { + sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); + } if (info.uses_is_helper_invocation) { is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c41cad098..9d8340333 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -149,6 +149,7 @@ public: Id workgroup_id{}; Id local_invocation_id{}; Id invocation_id{}; + Id sample_id{}; Id is_helper_invocation{}; Id subgroup_local_invocation_id{}; Id subgroup_mask_eq{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 90c4833a8..9ec970706 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -335,6 +335,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_typeless_image_writes) { ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); } + if (info.uses_sample_id) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } if (!ctx.profile.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8caf30f1b..dfddf5e58 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -70,6 +70,7 @@ void EmitSetOFlag(EmitContext& ctx); Id EmitWorkgroupId(EmitContext& ctx); Id EmitLocalInvocationId(EmitContext& ctx); Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index aaa20ab95..7555dd94c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -391,6 +391,10 @@ Id EmitInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); } +Id EmitSampleId(EmitContext& ctx) { + return ctx.OpLoad(ctx.U32[1], ctx.sample_id); +} + Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b821d9f47..141efd86c 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -375,6 +375,10 @@ U32 IREmitter::InvocationId() { return Inst(Opcode::InvocationId); } +U32 IREmitter::SampleId() { + return Inst(Opcode::SampleId); +} + U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 7f8f1ad42..81833d928 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -99,6 +99,7 @@ public: [[nodiscard]] U32 LocalInvocationIdZ(); [[nodiscard]] U32 InvocationId(); + [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index a86542cd8..d5e443673 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -63,6 +63,7 @@ OPCODE(SetOFlag, Void, U1, OPCODE(WorkgroupId, U32x3, ) OPCODE(LocalInvocationId, U32x3, ) OPCODE(InvocationId, U32, ) +OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) // Undefined diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a45d1e4be..a4f99bbbe 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -181,10 +181,6 @@ void TranslatorVisitor::PEXIT(u64) { ThrowNotImplemented(Opcode::PEXIT); } -void TranslatorVisitor::PIXLD(u64) { - ThrowNotImplemented(Opcode::PIXLD); -} - void TranslatorVisitor::PLONGJMP(u64) { ThrowNotImplemented(Opcode::PLONGJMP); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + CovMask, + Covered, + Offset, + CentroidOffset, + MyIndex, +}; +} // Anonymous namespace + +void TranslatorVisitor::PIXLD(u64 insn) { + union { + u64 raw; + BitField<31, 3, Mode> mode; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> addr_reg; + BitField<20, 8, s64> addr_offset; + BitField<45, 3, IR::Pred> dest_pred; + } const pixld{insn}; + + if (pixld.dest_pred != IR::Pred::PT) { + throw NotImplementedException("Destination predicate"); + } + if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { + throw NotImplementedException("Non-zero source register"); + } + switch (pixld.mode) { + case Mode::MyIndex: + X(pixld.dest_reg, ir.SampleId()); + break; + default: + throw NotImplementedException("Mode {}", pixld.mode.Value()); + } +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9631a445e..5d1310466 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -415,6 +415,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::InvocationId: info.uses_invocation_id = true; break; + case IR::Opcode::SampleId: + info.uses_sample_id = true; + break; case IR::Opcode::IsHelperInvocation: info.uses_is_helper_invocation = true; break; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d33df8aad..686f5c719 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -102,6 +102,7 @@ struct Info { bool uses_workgroup_id{}; bool uses_local_invocation_id{}; bool uses_invocation_id{}; + bool uses_sample_id{}; bool uses_is_helper_invocation{}; bool uses_subgroup_invocation_id{}; std::array uses_patches{}; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 72b83f99a..038231298 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -218,7 +218,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .independentBlend = true, .geometryShader = true, .tessellationShader = true, - .sampleRateShading = false, + .sampleRateShading = true, .dualSrcBlend = false, .logicOp = false, .multiDrawIndirect = false, @@ -677,6 +677,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), + std::make_pair(features.sampleRateShading, "sampleRateShading"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), -- cgit v1.2.3 From f18a6dd1bdaffda4c3e771af3cf7cf41919ebd67 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 16 Apr 2021 23:52:58 +0200 Subject: shader: Implement SR_Y_DIRECTION --- src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 7 +++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 ++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + .../frontend/maxwell/translate/impl/move_special_register.cpp | 2 ++ src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 2 ++ src/video_core/renderer_vulkan/fixed_pipeline_state.h | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 10 files changed, 22 insertions(+) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index d43c72f6e..7949d08d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -72,6 +72,7 @@ Id EmitLocalInvocationId(EmitContext& ctx); Id EmitInvocationId(EmitContext& ctx); Id EmitSampleId(EmitContext& ctx); Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); Id EmitLoadLocal(EmitContext& ctx, Id word_offset); void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); Id EmitUndefU1(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e5e4c352b..1030404c0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -403,6 +403,13 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); } +Id EmitYDirection(EmitContext& ctx) { + if (ctx.profile.y_negate) { + return ctx.Constant(ctx.F32[1], -1.0f); + } + return ctx.Constant(ctx.F32[1], 1.0f); +} + Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; return ctx.OpLoad(ctx.U32[1], pointer); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index aebe7200f..c3e8d0681 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -379,6 +379,10 @@ U1 IREmitter::IsHelperInvocation() { return Inst(Opcode::IsHelperInvocation); } +F32 IREmitter::YDirection() { + return Inst(Opcode::YDirection); +} + U32 IREmitter::LaneId() { return Inst(Opcode::LaneId); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index b9d051b43..7e67f5e30 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -102,6 +102,7 @@ public: [[nodiscard]] U32 InvocationId(); [[nodiscard]] U32 SampleId(); [[nodiscard]] U1 IsHelperInvocation(); + [[nodiscard]] F32 YDirection(); [[nodiscard]] U32 LaneId(); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 1cfc2a943..269de8ca5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -65,6 +65,7 @@ OPCODE(LocalInvocationId, U32x3, OPCODE(InvocationId, U32, ) OPCODE(SampleId, U32, ) OPCODE(IsHelperInvocation, U1, ) +OPCODE(YDirection, F32, ) // Undefined OPCODE(UndefU1, U1, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 660b84c20..b0baff74b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -150,6 +150,8 @@ enum class SpecialRegister : u64 { return ir.SubgroupGtMask(); case SpecialRegister::SR_GEMASK: return ir.SubgroupGeMask(); + case SpecialRegister::SR_Y_DIRECTION: + return ir.BitCast(ir.YDirection()); default: throw NotImplementedException("S2R special register {}", special_register); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a2c2948d5..08242184f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -97,6 +97,8 @@ struct Profile { std::optional alpha_test_func; float alpha_test_reference{}; + bool y_negate{}; + std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 6a3baf837..24834e0f7 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -82,6 +82,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 5568c4f72..31de6b2c8 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -202,6 +202,7 @@ struct FixedPipelineState { BitField<3, 1, u32> early_z; BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; + BitField<10, 1, u32> y_negate; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4d0d3ebb7..e9b93336b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1116,6 +1116,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, break; } profile.force_early_z = key.state.early_z != 0; + profile.y_negate = key.state.y_negate != 0; return profile; } -- cgit v1.2.3 From dd860b684c7695097107c1186e96a70e754e5990 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Apr 2021 19:48:45 -0300 Subject: shader: Implement D3D samplers --- src/shader_recompiler/environment.h | 2 +- src/shader_recompiler/ir_opt/texture_pass.cpp | 80 +++++++++++++++++++--- src/shader_recompiler/shader_info.h | 6 ++ .../renderer_vulkan/vk_compute_pipeline.cpp | 31 ++++++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 25 +++++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 32 ++++----- 6 files changed, 127 insertions(+), 49 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 1c50ae51e..090bc1c08 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -17,7 +17,7 @@ public: [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; - [[nodiscard]] virtual TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) = 0; + [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e1d5a2ce1..5ac485522 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -19,6 +19,9 @@ namespace { struct ConstBufferAddr { u32 index; u32 offset; + u32 secondary_index; + u32 secondary_offset; + bool has_secondary; }; struct TextureInst { @@ -109,9 +112,38 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } +std::optional TryGetConstBuffer(const IR::Inst* inst); + +std::optional Track(const IR::Value& value) { + return IR::BreadthFirstSearch(value, TryGetConstBuffer); +} + std::optional TryGetConstBuffer(const IR::Inst* inst) { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + switch (inst->GetOpcode()) { + default: return std::nullopt; + case IR::Opcode::BitwiseOr32: { + std::optional lhs{Track(inst->Arg(0))}; + std::optional rhs{Track(inst->Arg(1))}; + if (!lhs || !rhs) { + return std::nullopt; + } + if (lhs->has_secondary || rhs->has_secondary) { + return std::nullopt; + } + if (lhs->index > rhs->index || lhs->offset > rhs->offset) { + std::swap(lhs, rhs); + } + return ConstBufferAddr{ + .index = lhs->index, + .offset = lhs->offset, + .secondary_index = rhs->index, + .secondary_offset = rhs->offset, + .has_secondary = true, + }; + } + case IR::Opcode::GetCbufU32: + break; } const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; @@ -127,13 +159,12 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { return ConstBufferAddr{ .index{index.U32()}, .offset{offset.U32()}, + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } -std::optional Track(const IR::Value& value) { - return IR::BreadthFirstSearch(value, TryGetConstBuffer); -} - TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { @@ -146,6 +177,9 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { addr = ConstBufferAddr{ .index = env.TextureBoundBuffer(), .offset = inst.Arg(0).U32(), + .secondary_index = 0, + .secondary_offset = 0, + .has_secondary = false, }; } return TextureInst{ @@ -155,6 +189,14 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { }; } +TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { + const u32 secondary_index{cbuf.has_secondary ? cbuf.index : cbuf.secondary_index}; + const u32 secondary_offset{cbuf.has_secondary ? cbuf.offset : cbuf.secondary_offset}; + const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; + const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; + return env.ReadTextureType(lhs_raw | rhs_raw); +} + class Descriptors { public: explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, @@ -167,8 +209,11 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + return desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -181,8 +226,12 @@ public: u32 Add(const TextureDescriptor& desc) { return Add(texture_descriptors, desc, [&desc](const auto& existing) { - return desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset && desc.type == existing.type; + return desc.type == existing.type && desc.is_depth == existing.is_depth && + desc.has_secondary == existing.has_secondary && + desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && + desc.secondary_cbuf_index == existing.secondary_cbuf_index && + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; }); } @@ -247,14 +296,14 @@ void TexturePass(Environment& env, IR::Program& program) { auto flags{inst->Flags()}; switch (inst->GetOpcode()) { case IR::Opcode::ImageQueryDimensions: - flags.type.Assign(env.ReadTextureType(cbuf.index, cbuf.offset)); + flags.type.Assign(ReadTextureType(env, cbuf)); inst->SetFlags(flags); break; case IR::Opcode::ImageFetch: if (flags.type != TextureType::Color1D) { break; } - if (env.ReadTextureType(cbuf.index, cbuf.offset) == TextureType::Buffer) { + if (ReadTextureType(env, cbuf) == TextureType::Buffer) { // Replace with the bound texture type only when it's a texture buffer // If the instruction is 1D and the bound type is 2D, don't change the code and let // the rasterizer robustness handle it @@ -270,6 +319,9 @@ void TexturePass(Environment& env, IR::Program& program) { switch (inst->GetOpcode()) { case IR::Opcode::ImageRead: case IR::Opcode::ImageWrite: { + if (cbuf.has_secondary) { + throw NotImplementedException("Unexpected separate sampler"); + } const bool is_written{inst->GetOpcode() == IR::Opcode::ImageWrite}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ @@ -294,16 +346,22 @@ void TexturePass(Environment& env, IR::Program& program) { default: if (flags.type == TextureType::Buffer) { index = descriptors.Add(TextureBufferDescriptor{ + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } else { index = descriptors.Add(TextureDescriptor{ .type = flags.type, .is_depth = flags.is_depth != 0, + .has_secondary = cbuf.has_secondary, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, + .secondary_cbuf_index = cbuf.secondary_index, + .secondary_cbuf_offset = cbuf.secondary_offset, .count = 1, }); } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 50b4d1c05..0f45bdfb6 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -61,8 +61,11 @@ struct StorageBufferDescriptor { }; struct TextureBufferDescriptor { + bool has_secondary; u32 cbuf_index; u32 cbuf_offset; + u32 secondary_cbuf_index; + u32 secondary_cbuf_offset; u32 count; }; using TextureBufferDescriptors = boost::container::small_vector; @@ -79,8 +82,11 @@ using ImageBufferDescriptors = boost::container::small_vector; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3c907ec5a..45d837ca4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -88,23 +88,34 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, boost::container::static_vector image_view_indices; boost::container::static_vector samplers; - const auto& launch_desc{kepler_compute.launch_description}; - const auto& cbufs{launch_desc.const_buffer_config}; - const bool via_header_index{launch_desc.linked_tsc}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); - const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d5e9dae0f..08f00b9ce 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -169,20 +169,31 @@ void GraphicsPipeline::Configure(bool is_indexed) { ++index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](u32 cbuf_index, u32 cbuf_offset) { - ASSERT(cbufs[cbuf_index].enabled); - const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory.Read(addr)}; - return TextureHandle(raw_handle, via_header_index); + const auto read_handle{[&](const auto& desc) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + desc.secondary_cbuf_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TextureHandle{raw, via_header_index}; + } + } + return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + const TextureHandle handle{read_handle(desc)}; image_view_indices.push_back(handle.image); Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e9b93336b..4317b2ac7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -188,9 +188,7 @@ protected: } Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - GPUVAddr cbuf_addr, u32 cbuf_size, u32 cbuf_index, - u32 cbuf_offset) { - const u32 raw{cbuf_offset < cbuf_size ? gpu_memory->Read(cbuf_addr + cbuf_offset) : 0}; + u32 raw) { const TextureHandle handle{raw, via_header_index}; const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; @@ -219,7 +217,7 @@ protected: throw Shader::NotImplementedException("Unknown texture type"); } }()}; - texture_types.emplace(MakeCbufKey(cbuf_index, cbuf_offset), result); + texture_types.emplace(raw, result); return result; } @@ -227,7 +225,7 @@ protected: GPUVAddr program_base{}; std::vector code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; u32 local_memory_size{}; @@ -250,7 +248,7 @@ using Shader::Maxwell::TranslateProgram; // TODO: Move this to a separate file constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{1}; +constexpr u32 CACHE_VERSION{2}; class GraphicsEnvironment final : public GenericEnvironment { public: @@ -308,13 +306,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{maxwell3d->regs}; - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, - cbuf.address, cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); } private: @@ -352,13 +347,10 @@ public: return value; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { + Shader::TextureType ReadTextureType(u32 handle) override { const auto& regs{kepler_compute->regs}; const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, - cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); } private: @@ -421,7 +413,7 @@ public: code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); file.read(reinterpret_cast(code.get()), code_size); for (size_t i = 0; i < num_texture_types; ++i) { - u64 key; + u32 key; Shader::TextureType type; file.read(reinterpret_cast(&key), sizeof(key)) .read(reinterpret_cast(&type), sizeof(type)); @@ -457,8 +449,8 @@ public: return it->second; } - Shader::TextureType ReadTextureType(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{texture_types.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + Shader::TextureType ReadTextureType(u32 handle) override { + const auto it{texture_types.find(handle)}; if (it == texture_types.end()) { throw Shader::LogicError("Uncached read texture type"); } @@ -483,7 +475,7 @@ public: private: std::unique_ptr code; - std::unordered_map texture_types; + std::unordered_map texture_types; std::unordered_map cbuf_values; std::array workgroup_size{}; u32 local_memory_size{}; -- cgit v1.2.3 From 050e81500c002f304d581f28700de549b828a2bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 21 Apr 2021 00:35:47 -0300 Subject: shader: Move microinstruction header to the value header --- src/shader_recompiler/CMakeLists.txt | 1 - src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 - src/shader_recompiler/backend/spirv/emit_spirv.h | 1 - src/shader_recompiler/frontend/ir/basic_block.h | 1 - .../frontend/ir/breadth_first_search.h | 1 - .../frontend/ir/microinstruction.cpp | 2 +- .../frontend/ir/microinstruction.h | 162 --------------------- src/shader_recompiler/frontend/ir/program.cpp | 2 +- src/shader_recompiler/frontend/ir/value.cpp | 1 - src/shader_recompiler/frontend/ir/value.h | 151 ++++++++++++++++++- .../frontend/maxwell/structured_control_flow.h | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 2 +- .../ir_opt/constant_propagation_pass.cpp | 2 +- .../ir_opt/dead_code_elimination_pass.cpp | 2 +- .../global_memory_to_storage_buffer_pass.cpp | 2 +- .../ir_opt/identity_removal_pass.cpp | 2 +- .../ir_opt/lower_fp16_to_fp32.cpp | 2 +- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 2 +- src/shader_recompiler/ir_opt/verification_pass.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 2 +- 20 files changed, 162 insertions(+), 181 deletions(-) delete mode 100644 src/shader_recompiler/frontend/ir/microinstruction.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 07963a760..3d7506de2 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -36,7 +36,6 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.cpp frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp - frontend/ir/microinstruction.h frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5d6fdeb65..815b3cd95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -10,7 +10,6 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 89a82e858..4562db45b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,7 +8,6 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 3a4230755..ab7ddb3d5 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -13,7 +13,6 @@ #include "common/bit_cast.h" #include "shader_recompiler/frontend/ir/condition.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/object_pool.h" diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h index b35f062d4..a52ccbd58 100644 --- a/src/shader_recompiler/frontend/ir/breadth_first_search.h +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h @@ -10,7 +10,6 @@ #include -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 41f9fa0cd..701746a0c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -6,8 +6,8 @@ #include #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { namespace { diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h deleted file mode 100644 index ea55fc29c..000000000 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include "common/bit_cast.h" -#include "common/common_types.h" -#include "shader_recompiler/frontend/ir/opcodes.h" -#include "shader_recompiler/frontend/ir/type.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::IR { - -class Block; - -struct AssociatedInsts; - -class Inst : public boost::intrusive::list_base_hook<> { -public: - explicit Inst(Opcode op_, u32 flags_) noexcept; - ~Inst(); - - Inst& operator=(const Inst&) = delete; - Inst(const Inst&) = delete; - - Inst& operator=(Inst&&) = delete; - Inst(Inst&&) = delete; - - /// Get the number of uses this instruction has. - [[nodiscard]] int UseCount() const noexcept { - return use_count; - } - - /// Determines whether this instruction has uses or not. - [[nodiscard]] bool HasUses() const noexcept { - return use_count > 0; - } - - /// Get the opcode this microinstruction represents. - [[nodiscard]] IR::Opcode GetOpcode() const noexcept { - return op; - } - - /// Determines if there is a pseudo-operation associated with this instruction. - [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { - return associated_insts != nullptr; - } - - /// Determines whether or not this instruction may have side effects. - [[nodiscard]] bool MayHaveSideEffects() const noexcept; - - /// Determines whether or not this instruction is a pseudo-instruction. - /// Pseudo-instructions depend on their parent instructions for their semantics. - [[nodiscard]] bool IsPseudoInstruction() const noexcept; - - /// Determines if all arguments of this instruction are immediates. - [[nodiscard]] bool AreAllArgsImmediates() const; - - /// Gets a pseudo-operation associated with this instruction - [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); - - /// Get the type this instruction returns. - [[nodiscard]] IR::Type Type() const; - - /// Get the number of arguments this instruction has. - [[nodiscard]] size_t NumArgs() const { - return op == Opcode::Phi ? phi_args.size() : NumArgsOf(op); - } - - /// Get the value of a given argument index. - [[nodiscard]] Value Arg(size_t index) const noexcept { - if (op == Opcode::Phi) { - return phi_args[index].second; - } else { - return args[index]; - } - } - - /// Set the value of a given argument index. - void SetArg(size_t index, Value value); - - /// Get a pointer to the block of a phi argument. - [[nodiscard]] Block* PhiBlock(size_t index) const; - /// Add phi operand to a phi instruction. - void AddPhiOperand(Block* predecessor, const Value& value); - - void Invalidate(); - void ClearArgs(); - - void ReplaceUsesWith(Value replacement); - - void ReplaceOpcode(IR::Opcode opcode); - - template - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] FlagsType Flags() const noexcept { - FlagsType ret; - std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); - return ret; - } - - template - requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) - [[nodiscard]] void SetFlags(FlagsType value) noexcept { - std::memcpy(&flags, &value, sizeof(value)); - } - - /// Intrusively store the host definition of this instruction. - template - void SetDefinition(DefinitionType def) { - definition = Common::BitCast(def); - } - - /// Return the intrusively stored host definition of this instruction. - template - [[nodiscard]] DefinitionType Definition() const noexcept { - return Common::BitCast(definition); - } - -private: - struct NonTriviallyDummy { - NonTriviallyDummy() noexcept {} - }; - - void Use(const Value& value); - void UndoUse(const Value& value); - - IR::Opcode op{}; - int use_count{}; - u32 flags{}; - u32 definition{}; - union { - NonTriviallyDummy dummy{}; - boost::container::small_vector, 2> phi_args; - std::array args; - }; - std::unique_ptr associated_insts; -}; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); - -struct AssociatedInsts { - union { - Inst* in_bounds_inst; - Inst* sparse_inst; - Inst* zero_inst{}; - }; - Inst* sign_inst{}; - Inst* carry_inst{}; - Inst* overflow_inst{}; -}; - -} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 89a17fb1b..3fc06f855 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -8,8 +8,8 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index bf5f8c0c2..a8a919e0e 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/value.h" diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 303745563..d90a68b37 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -4,19 +4,34 @@ #pragma once +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/bit_cast.h" #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/attribute.h" +#include "shader_recompiler/frontend/ir/opcodes.h" +#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" -#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/frontend/ir/value.h" namespace Shader::IR { class Block; class Inst; +struct AssociatedInsts; + class Value { public: Value() noexcept : type{IR::Type::Void}, inst{nullptr} {} @@ -101,6 +116,140 @@ public: explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; +class Inst : public boost::intrusive::list_base_hook<> { +public: + explicit Inst(IR::Opcode op_, u32 flags_) noexcept; + ~Inst(); + + Inst& operator=(const Inst&) = delete; + Inst(const Inst&) = delete; + + Inst& operator=(Inst&&) = delete; + Inst(Inst&&) = delete; + + /// Get the number of uses this instruction has. + [[nodiscard]] int UseCount() const noexcept { + return use_count; + } + + /// Determines whether this instruction has uses or not. + [[nodiscard]] bool HasUses() const noexcept { + return use_count > 0; + } + + /// Get the opcode this microinstruction represents. + [[nodiscard]] IR::Opcode GetOpcode() const noexcept { + return op; + } + + /// Determines if there is a pseudo-operation associated with this instruction. + [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { + return associated_insts != nullptr; + } + + /// Determines whether or not this instruction may have side effects. + [[nodiscard]] bool MayHaveSideEffects() const noexcept; + + /// Determines whether or not this instruction is a pseudo-instruction. + /// Pseudo-instructions depend on their parent instructions for their semantics. + [[nodiscard]] bool IsPseudoInstruction() const noexcept; + + /// Determines if all arguments of this instruction are immediates. + [[nodiscard]] bool AreAllArgsImmediates() const; + + /// Gets a pseudo-operation associated with this instruction + [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); + + /// Get the type this instruction returns. + [[nodiscard]] IR::Type Type() const; + + /// Get the number of arguments this instruction has. + [[nodiscard]] size_t NumArgs() const { + return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); + } + + /// Get the value of a given argument index. + [[nodiscard]] Value Arg(size_t index) const noexcept { + if (op == IR::Opcode::Phi) { + return phi_args[index].second; + } else { + return args[index]; + } + } + + /// Set the value of a given argument index. + void SetArg(size_t index, Value value); + + /// Get a pointer to the block of a phi argument. + [[nodiscard]] Block* PhiBlock(size_t index) const; + /// Add phi operand to a phi instruction. + void AddPhiOperand(Block* predecessor, const Value& value); + + void Invalidate(); + void ClearArgs(); + + void ReplaceUsesWith(Value replacement); + + void ReplaceOpcode(IR::Opcode opcode); + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] FlagsType Flags() const noexcept { + FlagsType ret; + std::memcpy(reinterpret_cast(&ret), &flags, sizeof(ret)); + return ret; + } + + template + requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) + [[nodiscard]] void SetFlags(FlagsType value) noexcept { + std::memcpy(&flags, &value, sizeof(value)); + } + + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + +private: + struct NonTriviallyDummy { + NonTriviallyDummy() noexcept {} + }; + + void Use(const Value& value); + void UndoUse(const Value& value); + + IR::Opcode op{}; + int use_count{}; + u32 flags{}; + u32 definition{}; + union { + NonTriviallyDummy dummy{}; + boost::container::small_vector, 2> phi_args; + std::array args; + }; + std::unique_ptr associated_insts; +}; +static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); + +struct AssociatedInsts { + union { + Inst* in_bounds_inst; + Inst* sparse_inst; + Inst* zero_inst{}; + }; + Inst* sign_inst{}; + Inst* carry_inst{}; + Inst* overflow_inst{}; +}; + using U1 = TypedValue; using U8 = TypedValue; using U16 = TypedValue; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index e4797291e..a6be12ba2 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h @@ -11,7 +11,7 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cccf0909d..bb4aeb57c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -3,9 +3,9 @@ // Refer to the license.txt file included. #include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" #include "shader_recompiler/shader_info.h" diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 2206f93c2..770d3de61 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -11,7 +11,7 @@ #include "common/bit_util.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp index 8ad59f42e..f9c5334b5 100644 --- a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index f294d297f..87eca2a0d 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -15,7 +15,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/breadth_first_search.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp index 38af72dfe..6afbe24f7 100644 --- a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 62e73d52d..773e1f961 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -5,7 +5,7 @@ #include #include "shader_recompiler/frontend/ir/ir_emitter.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index bb1a90004..fe86a164b 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -23,10 +23,10 @@ #include #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/opcodes.h" #include "shader_recompiler/frontend/ir/pred.h" #include "shader_recompiler/frontend/ir/reg.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp index dbec96d84..62bf5f8ff 100644 --- a/src/shader_recompiler/ir_opt/verification_pass.cpp +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp @@ -6,7 +6,7 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Optimization { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8b6839966..e12e4422f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -16,7 +16,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -- cgit v1.2.3 From d10cf55353175b13bed4cf18791e080ecb7fd95b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:17:59 -0300 Subject: shader: Implement indexed textures --- .../backend/spirv/emit_context.cpp | 79 +++++++++++-------- src/shader_recompiler/backend/spirv/emit_context.h | 11 ++- .../backend/spirv/emit_spirv_image.cpp | 58 ++++++++------ src/shader_recompiler/frontend/ir/modifiers.h | 17 +++-- src/shader_recompiler/frontend/ir/opcodes.inc | 24 +++--- src/shader_recompiler/ir_opt/texture_pass.cpp | 89 +++++++++++++++++----- src/shader_recompiler/shader_info.h | 4 + src/video_core/renderer_vulkan/pipeline_helper.h | 50 +++++++----- .../renderer_vulkan/vk_compute_pipeline.cpp | 46 ++++++----- .../renderer_vulkan/vk_graphics_pipeline.cpp | 63 +++++++++------ 10 files changed, 284 insertions(+), 157 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 7f16cb0dc..8e625f8fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -380,6 +380,24 @@ Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_p ctx.OpFunctionEnd(); return func; } + +template +std::string NameOf(const Desc& desc, std::string_view prefix) { + if (desc.count > 1) { + return fmt::format("{}{}_{:02x}x{}", prefix, desc.cbuf_index, desc.cbuf_offset, desc.count); + } else { + return fmt::format("{}{}_{:02x}", prefix, desc.cbuf_index, desc.cbuf_offset); + } +} + +Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { + if (count > 1) { + const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; + return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); + } else { + return pointer_type; + } +} } // Anonymous namespace void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { @@ -971,12 +989,15 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - texture_buffers.insert(texture_buffers.end(), desc.count, id); + Name(id, NameOf(desc, "texbuf")); + texture_buffers.push_back({ + .id = id, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } @@ -992,44 +1013,41 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("imgbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - const ImageBufferDefinition def{ + Name(id, NameOf(desc, "imgbuf")); + image_buffers.push_back({ .id = id, .image_type = image_type, - }; - image_buffers.insert(image_buffers.end(), desc.count, def); + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { - if (desc.count != 1) { - throw NotImplementedException("Array of textures"); - } const Id image_type{ImageType(*this, desc)}; const Id sampled_type{TypeSampledImage(image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; - const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; + const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; + const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - for (u32 index = 0; index < desc.count; ++index) { - // TODO: Pass count info - textures.push_back(TextureDefinition{ - .id{id}, - .sampled_type{sampled_type}, - .image_type{image_type}, - }); - } + Name(id, NameOf(desc, "tex")); + textures.push_back({ + .id = id, + .sampled_type = sampled_type, + .pointer_type = pointer_type, + .image_type = image_type, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } @@ -1037,24 +1055,23 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { images.reserve(info.image_descriptors.size()); for (const ImageDescriptor& desc : info.image_descriptors) { if (desc.count != 1) { - throw NotImplementedException("Array of textures"); + throw NotImplementedException("Array of images"); } const Id image_type{ImageType(*this, desc)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("img{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - for (u32 index = 0; index < desc.count; ++index) { - images.push_back(ImageDefinition{ - .id{id}, - .image_type{image_type}, - }); - } + Name(id, NameOf(desc, "img")); + images.push_back({ + .id = id, + .image_type = image_type, + .count = desc.count, + }); if (profile.supported_spirv >= 0x00010400) { interfaces.push_back(id); } - binding += desc.count; + ++binding; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index a4503c7ab..c52544fb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -32,17 +32,26 @@ private: struct TextureDefinition { Id id; Id sampled_type; + Id pointer_type; Id image_type; + u32 count; +}; + +struct TextureBufferDefinition { + Id id; + u32 count; }; struct ImageBufferDefinition { Id id; Id image_type; + u32 count; }; struct ImageDefinition { Id id; Id image_type; + u32 count; }; struct UniformDefinitions { @@ -162,7 +171,7 @@ public: std::array cbufs{}; std::array ssbos{}; - std::vector texture_buffers; + std::vector texture_buffers; std::vector image_buffers; std::vector textures; std::vector images; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 90817f161..6008980af 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -147,24 +147,31 @@ private: spv::ImageOperandsMask mask{}; }; -Id Texture(EmitContext& ctx, const IR::Value& index) { - if (index.IsImmediate()) { - const TextureDefinition def{ctx.textures.at(index.U32())}; +Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; + return ctx.OpLoad(def.sampled_type, pointer); + } else { return ctx.OpLoad(def.sampled_type, def.id); } - throw NotImplementedException("Indirect texture sample"); } -Id TextureImage(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { - if (!index.IsImmediate()) { - throw NotImplementedException("Indirect texture sample"); - } +Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, + [[maybe_unused]] const IR::Value& index) { if (info.type == TextureType::Buffer) { - const Id sampler_id{ctx.texture_buffers.at(index.U32())}; + const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } + const Id sampler_id{def.id}; const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; return ctx.OpImage(ctx.image_buffer_type, id); } else { - const TextureDefinition def{ctx.textures.at(index.U32())}; + const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; + if (def.count > 1) { + throw NotImplementedException("Indirect texture sample"); + } return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); } } @@ -311,7 +318,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& bias_lc, offset); return Emit(&EmitContext::OpImageSparseSampleImplicitLod, &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } else { // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as // if the lod was explicitly zero. This may change on Turing with implicit compute @@ -320,7 +327,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], - Texture(ctx, index), coords, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } } @@ -329,8 +336,8 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& const auto info{inst->Flags()}; const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -340,7 +347,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va offset); return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, @@ -349,7 +356,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset); return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], - Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, @@ -357,15 +364,17 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, ctx.Const(info.gather_component), + ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), operands.Mask(), operands.Span()); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, const IR::Value& offset, const IR::Value& offset2, Id dref) { + const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); + ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.Mask(), + operands.Span()); } Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, @@ -376,12 +385,12 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c } const ImageOperands operands(offset, lod, ms); return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], - TextureImage(ctx, index, info), coords, operands.Mask(), operands.Span()); + TextureImage(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { const auto info{inst->Flags()}; - const Id image{TextureImage(ctx, index, info)}; + const Id image{TextureImage(ctx, info, index)}; const Id zero{ctx.u32_zero_value}; const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; switch (info.type) { @@ -405,9 +414,10 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i throw LogicError("Unspecified image type {}", info.type.Value()); } -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst*, const IR::Value& index, Id coords) { +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { + const auto info{inst->Flags()}; const Id zero{ctx.f32_zero_value}; - const Id sampler{Texture(ctx, index)}; + const Id sampler{Texture(ctx, info, index)}; return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), zero, zero); } @@ -418,8 +428,8 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, - &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index), - coords, operands.Mask(), operands.Span()); + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); } Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 5d7efa14c..77cda1f8a 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -34,14 +34,15 @@ static_assert(sizeof(FpControl) <= sizeof(u32)); union TextureInstInfo { u32 raw; - BitField<0, 8, TextureType> type; - BitField<8, 1, u32> is_depth; - BitField<9, 1, u32> has_bias; - BitField<10, 1, u32> has_lod_clamp; - BitField<11, 1, u32> relaxed_precision; - BitField<12, 2, u32> gather_component; - BitField<14, 2, u32> num_derivates; - BitField<16, 3, ImageFormat> image_format; + BitField<0, 16, u32> descriptor_index; + BitField<16, 3, TextureType> type; + BitField<19, 1, u32> is_depth; + BitField<20, 1, u32> has_bias; + BitField<21, 1, u32> has_lod_clamp; + BitField<22, 1, u32> relaxed_precision; + BitField<23, 2, u32> gather_component; + BitField<25, 2, u32> num_derivates; + BitField<27, 3, ImageFormat> image_format; }; static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b6869d4e4..8f32c9e74 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -482,18 +482,18 @@ OPCODE(BoundImageGradient, F32x4, U32, OPCODE(BoundImageRead, U32x4, U32, Opaque, ) OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) -OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) -OPCODE(ImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) -OPCODE(ImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) -OPCODE(ImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageQueryDimensions, U32x4, U32, U32, ) -OPCODE(ImageQueryLod, F32x4, U32, Opaque, ) -OPCODE(ImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) -OPCODE(ImageRead, U32x4, U32, Opaque, ) -OPCODE(ImageWrite, Void, U32, Opaque, U32x4, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) +OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) +OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) +OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageRead, U32x4, Opaque, Opaque, ) +OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) // Warp operations OPCODE(LaneId, U32, ) diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 5ac485522..cfa6b34b9 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -21,6 +22,8 @@ struct ConstBufferAddr { u32 offset; u32 secondary_index; u32 secondary_offset; + IR::U32 dynamic_offset; + u32 count; bool has_secondary; }; @@ -32,6 +35,9 @@ struct TextureInst { using TextureInstVector = boost::container::small_vector; +constexpr u32 DESCRIPTOR_SIZE = 8; +constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE)); + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -131,6 +137,9 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { if (lhs->has_secondary || rhs->has_secondary) { return std::nullopt; } + if (lhs->count > 1 || rhs->count > 1) { + return std::nullopt; + } if (lhs->index > rhs->index || lhs->offset > rhs->offset) { std::swap(lhs, rhs); } @@ -139,9 +148,12 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { .offset = lhs->offset, .secondary_index = rhs->index, .secondary_offset = rhs->offset, + .dynamic_offset = {}, + .count = 1, .has_secondary = true, }; } + case IR::Opcode::GetCbufU32x2: case IR::Opcode::GetCbufU32: break; } @@ -152,15 +164,39 @@ std::optional TryGetConstBuffer(const IR::Inst* inst) { // but not supported here at the moment return std::nullopt; } - if (!offset.IsImmediate()) { - // TODO: Support arrays of textures + if (offset.IsImmediate()) { + return ConstBufferAddr{ + .index = index.U32(), + .offset = offset.U32(), + .secondary_index = 0, + .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, + .has_secondary = false, + }; + } + IR::Inst* const offset_inst{offset.InstRecursive()}; + if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { + return std::nullopt; + } + u32 base_offset{}; + IR::U32 dynamic_offset; + if (offset_inst->Arg(0).IsImmediate()) { + base_offset = offset_inst->Arg(0).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(1)}; + } else if (offset_inst->Arg(1).IsImmediate()) { + base_offset = offset_inst->Arg(1).U32(); + dynamic_offset = IR::U32{offset_inst->Arg(0)}; + } else { return std::nullopt; } return ConstBufferAddr{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = base_offset, .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = dynamic_offset, + .count = 8, .has_secondary = false, }; } @@ -179,11 +215,13 @@ TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { .offset = inst.Arg(0).U32(), .secondary_index = 0, .secondary_offset = 0, + .dynamic_offset = {}, + .count = 1, .has_secondary = false, }; } return TextureInst{ - .cbuf{addr}, + .cbuf = addr, .inst = &inst, .block = block, }; @@ -209,18 +247,20 @@ public: u32 Add(const TextureBufferDescriptor& desc) { return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { - return desc.has_secondary == existing.has_secondary && - desc.cbuf_index == existing.cbuf_index && + return desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift && + desc.has_secondary == existing.has_secondary; }); } u32 Add(const ImageBufferDescriptor& desc) { return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; }); } @@ -231,7 +271,8 @@ public: desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.secondary_cbuf_index == existing.secondary_cbuf_index && - desc.secondary_cbuf_offset == existing.secondary_cbuf_offset; + desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && + desc.count == existing.count && desc.size_shift == existing.size_shift; }); } @@ -239,7 +280,8 @@ public: const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { return desc.type == existing.type && desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && - desc.cbuf_offset == existing.cbuf_offset; + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && + desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; return index; @@ -310,7 +352,6 @@ void TexturePass(Environment& env, IR::Program& program) { // This happens on Fire Emblem: Three Houses flags.type.Assign(TextureType::Buffer); } - inst->SetFlags(flags); break; default: break; @@ -329,7 +370,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(ImageDescriptor{ @@ -338,7 +380,8 @@ void TexturePass(Environment& env, IR::Program& program) { .is_written = is_written, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; @@ -351,7 +394,8 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } else { index = descriptors.Add(TextureDescriptor{ @@ -362,12 +406,23 @@ void TexturePass(Environment& env, IR::Program& program) { .cbuf_offset = cbuf.offset, .secondary_cbuf_index = cbuf.secondary_index, .secondary_cbuf_offset = cbuf.secondary_offset, - .count = 1, + .count = cbuf.count, + .size_shift = DESCRIPTOR_SIZE_SHIFT, }); } break; } - inst->SetArg(0, IR::Value{index}); + flags.descriptor_index.Assign(index); + inst->SetFlags(flags); + + if (cbuf.count > 1) { + const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; + IR::IREmitter ir{*texture_inst.block, insert_point}; + const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; + inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); + } else { + inst->SetArg(0, IR::Value{}); + } } } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 0f45bdfb6..0f28ae07b 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -67,6 +67,7 @@ struct TextureBufferDescriptor { u32 secondary_cbuf_index; u32 secondary_cbuf_offset; u32 count; + u32 size_shift; }; using TextureBufferDescriptors = boost::container::small_vector; @@ -76,6 +77,7 @@ struct ImageBufferDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + u32 size_shift; }; using ImageBufferDescriptors = boost::container::small_vector; @@ -88,6 +90,7 @@ struct TextureDescriptor { u32 secondary_cbuf_index; u32 secondary_cbuf_offset; u32 count; + u32 size_shift; }; using TextureDescriptors = boost::container::small_vector; @@ -98,6 +101,7 @@ struct ImageDescriptor { u32 cbuf_index; u32 cbuf_offset; u32 count; + u32 size_shift; }; using ImageDescriptors = boost::container::small_vector; diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index aaf9a735e..dd7d2cc0c 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -85,28 +85,30 @@ public: } void Add(const Shader::Info& info, VkShaderStageFlags stage) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors.size()); - Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors.size()); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); + Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); + Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); } private: - void Add(VkDescriptorType type, VkShaderStageFlags stage, size_t num) { + template + void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { + const size_t num{descriptors.size()}; for (size_t i = 0; i < num; ++i) { bindings.push_back({ .binding = binding, .descriptorType = type, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .stageFlags = stage, .pImmutableSamplers = nullptr, }); entries.push_back({ .dstBinding = binding, .dstArrayElement = 0, - .descriptorCount = 1, + .descriptorCount = descriptors[i].count, .descriptorType = type, .offset = offset, .stride = sizeof(DescriptorUpdateEntry), @@ -126,21 +128,29 @@ private: inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, const ImageId*& image_view_ids, TextureCache& texture_cache, VKUpdateDescriptorQueue& update_descriptor_queue) { - image_view_ids += info.texture_buffer_descriptors.size(); - image_view_ids += info.image_buffer_descriptors.size(); + for (const auto& desc : info.texture_buffer_descriptors) { + image_view_ids += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + image_view_ids += desc.count; + } for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{*(samplers++)}; - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - const VkImageView vk_image_view{image_view.Handle(desc.type)}; - update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + for (u32 index = 0; index < desc.count; ++index) { + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + const VkImageView vk_image_view{image_view.Handle(desc.type)}; + update_descriptor_queue.AddSampledImage(vk_image_view, sampler); + } } for (const auto& desc : info.image_descriptors) { - ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; - if (desc.is_written) { - texture_cache.MarkModification(image_view.image_id); + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; + update_descriptor_queue.AddImage(vk_image_view); } - const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - update_descriptor_queue.AddImage(vk_image_view); } } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 45d837ca4..6e9f66262 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -91,35 +91,41 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto& qmd{kepler_compute.launch_description}; const auto& cbufs{qmd.const_buffer_config}; const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - desc.secondary_cbuf_offset}; + secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TextureHandle{lhs_raw | rhs_raw, via_header_index}; } } return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - samplers.push_back(sampler->Handle()); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); @@ -130,16 +136,18 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++texture_buffer_ids; + ++index; } - ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); - buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), - image_view.format, is_written); - ++texture_buffer_ids; - ++index; }}; std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 08f00b9ce..b7688aef9 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -161,23 +161,26 @@ void GraphicsPipeline::Configure(bool is_indexed) { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++index; + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc) { + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); - const GPUVAddr addr{cbufs[desc.cbuf_index].address + desc.cbuf_offset}; + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - desc.secondary_cbuf_offset}; + second_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; @@ -187,17 +190,21 @@ void GraphicsPipeline::Configure(bool is_indexed) { return TextureHandle{gpu_memory.Read(addr), via_header_index}; }}; const auto add_image{[&](const auto& desc) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); + } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { - const TextureHandle handle{read_handle(desc)}; - image_view_indices.push_back(handle.image); + for (u32 index = 0; index < desc.count; ++index) { + const TextureHandle handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + samplers.push_back(sampler->Handle()); + } } std::ranges::for_each(info.image_descriptors, add_image); } @@ -208,24 +215,30 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - ASSERT(desc.count == 1); - bool is_written{false}; - if constexpr (std::is_same_v) { - is_written = desc.is_written; + for (u32 index = 0; index < desc.count; ++index) { + bool is_written{false}; + if constexpr (std::is_same_v) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written); + ++index; + ++texture_buffer_index; } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written); - ++index; - ++texture_buffer_index; }}; const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsTextureBuffers(stage); std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - texture_buffer_index += info.texture_descriptors.size(); - texture_buffer_index += info.image_descriptors.size(); + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } buffer_cache.UpdateGraphicsBuffers(is_indexed); -- cgit v1.2.3 From 0ace34575cd099fb0db955ab32c215106ef19f84 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 16:19:14 -0300 Subject: shader: Require dual source blending --- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 038231298..9c609e504 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -219,7 +219,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .geometryShader = true, .tessellationShader = true, .sampleRateShading = true, - .dualSrcBlend = false, + .dualSrcBlend = true, .logicOp = false, .multiDrawIndirect = false, .drawIndirectFirstInstance = false, @@ -678,6 +678,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), + std::make_pair(features.dualSrcBlend, "dualSrcBlend"), std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), -- cgit v1.2.3 From 7a1f296cda32bdb8996f25fd1862b422ac2bfe48 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 22 Apr 2021 21:05:10 -0300 Subject: shader: Fix render targets with null attachments --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 18 +++++++--- .../renderer_vulkan/vk_render_pass_cache.cpp | 42 +++++++++++----------- 2 files changed, 34 insertions(+), 26 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b7688aef9..e43db280f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -105,6 +105,17 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); return key; } + +size_t NumAttachments(const FixedPipelineState& state) { + size_t num{}; + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const auto format{static_cast(state.color_formats[index])}; + if (format != Tegra::RenderTargetFormat::NONE) { + num = index + 1; + } + } + return num; +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -418,17 +429,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + const size_t num_attachments{NumAttachments(state)}; + for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto format{static_cast(state.color_formats[index])}; - if (format == Tegra::RenderTargetFormat::NONE) { - continue; - } const auto& blend{state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 991afe521..451ffe019 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -16,18 +16,6 @@ namespace Vulkan { namespace { using VideoCore::Surface::PixelFormat; -constexpr std::array ATTACHMENT_REFERENCES{ - VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, - VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, -}; - VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, VkSampleCountFlagBits samples) { using MaxwellToVK::SurfaceFormat; @@ -54,17 +42,29 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { return *pair->second; } boost::container::static_vector descriptions; + std::array references{}; + u32 num_attachments{}; + u32 num_colors{}; for (size_t index = 0; index < key.color_formats.size(); ++index) { const PixelFormat format{key.color_formats[index]}; - if (format == PixelFormat::Invalid) { - continue; + const bool is_valid{format != PixelFormat::Invalid}; + references[index] = VkAttachmentReference{ + .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (is_valid) { + descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + num_attachments = static_cast(index + 1); + ++num_colors; } - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); } - const size_t num_colors{descriptions.size()}; - const VkAttachmentReference* depth_attachment{}; + const bool has_depth{key.depth_format != PixelFormat::Invalid}; + VkAttachmentReference depth_reference{}; if (key.depth_format != PixelFormat::Invalid) { - depth_attachment = &ATTACHMENT_REFERENCES[num_colors]; + depth_reference = VkAttachmentReference{ + .attachment = num_colors, + .layout = VK_IMAGE_LAYOUT_GENERAL, + }; descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); } const VkSubpassDescription subpass{ @@ -72,10 +72,10 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .inputAttachmentCount = 0, .pInputAttachments = nullptr, - .colorAttachmentCount = static_cast(num_colors), - .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, + .colorAttachmentCount = num_attachments, + .pColorAttachments = references.data(), .pResolveAttachments = nullptr, - .pDepthStencilAttachment = depth_attachment, + .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, .preserveAttachmentCount = 0, .pPreserveAttachments = nullptr, }; -- cgit v1.2.3 From 2dc86372c76afb134651499452bb5074b6d1e839 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Fri, 23 Apr 2021 02:38:02 -0300 Subject: shader: Fix bugs and build issues on GCC --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 4 ++-- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6e9f66262..6611c1de3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -95,7 +95,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + desc.cbuf_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; if constexpr (std::is_same_v || std::is_same_v) { if (desc.has_secondary) { @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; index < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e43db280f..a8b402253 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -226,7 +226,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4415558bb..ebe073293 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -4,10 +4,10 @@ #pragma once +#include #include #include #include -#include #include #include "common/common_types.h" -- cgit v1.2.3 From 5b1b06f11e4520ec9d0b7864dc822daea3e3be0c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:33:21 -0300 Subject: vk_graphics_pipeline: Guard against non-tessellation pipelines using patches --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index a8b402253..2bc1f67ae 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -345,12 +345,18 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { + if (!spv_modules[1] && !spv_modules[2]) { + LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); + input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + } + } const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), + .topology = input_assembly_topology, .primitiveRestartEnable = state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; -- cgit v1.2.3 From 0c0ee9d8973abd7d1649df7a6b6f57b3a5570dfe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 07:39:00 -0300 Subject: vulkan_device: Require shaderClipDistance and shaderCullDistance features --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9c609e504..2318c1bda 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -249,8 +249,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderSampledImageArrayDynamicIndexing = false, .shaderStorageBufferArrayDynamicIndexing = false, .shaderStorageImageArrayDynamicIndexing = false, - .shaderClipDistance = false, - .shaderCullDistance = false, + .shaderClipDistance = true, + .shaderCullDistance = true, .shaderFloat64 = true, .shaderInt64 = true, .shaderInt16 = true, @@ -684,6 +684,8 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), std::make_pair(features.shaderStorageImageWriteWithoutFormat, "shaderStorageImageWriteWithoutFormat"), + std::make_pair(features.shaderClipDistance, "shaderClipDistance"), + std::make_pair(features.shaderCullDistance, "shaderCullDistance"), std::make_pair(demote.shaderDemoteToHelperInvocation, "shaderDemoteToHelperInvocation"), std::make_pair(variable_pointers.variablePointers, "variablePointers"), std::make_pair(variable_pointers.variablePointersStorageBuffer, -- cgit v1.2.3 From 8fda599a316b7b3a5e017cb01db1e9c021ce7654 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 23 Apr 2021 21:24:30 -0300 Subject: vk_compute_pipeline: Fix index comparison oversight on compute texture buffers --- src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 6611c1de3..990ead575 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -136,7 +136,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { - for (u32 i = 0; index < desc.count; ++i) { + for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; if constexpr (std::is_same_v) { is_written = desc.is_written; -- cgit v1.2.3 From f4ace63957ee47c4e3e913954f07375d0391beae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:27:25 -0300 Subject: shader: Accelerate pipeline transitions and use dirty flags for shaders --- src/video_core/dirty_flags.cpp | 6 +++ src/video_core/dirty_flags.h | 2 + src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_state_tracker.cpp | 6 --- src/video_core/renderer_opengl/gl_state_tracker.h | 1 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 +++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.h | 54 +++++++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 31 ++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 ++---------- 9 files changed, 114 insertions(+), 64 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { FillBlock(table, OFF(zeta), NUM(zeta), flag); } } + +void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), + NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); +} } // Anonymous namespace void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { SetupIndexBuffer(tables); SetupDirtyDescriptors(tables); SetupDirtyRenderTargets(tables); + SetupDirtyShaders(tables); } } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -36,6 +36,8 @@ enum : u8 { IndexBuffer, + Shaders, + LastCommonEntry, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3551dbdcc..dd1937863 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyShaders(Tables& tables) { - FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, - Shaders); -} - void SetupDirtyPolygonModes(Tables& tables) { tables[0][OFF(polygon_mode_front)] = PolygonModeFront; tables[0][OFF(polygon_mode_back)] = PolygonModeBack; @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyScissors(tables); SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); - SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); SetupDirtyDepthTest(tables); SetupDirtyStencilTest(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -52,7 +52,6 @@ enum : u8 { BlendState0, BlendState7 = BlendState0 + 7, - Shaders, ClipDistances, PolygonModes, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2bc1f67ae..100a5e07a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state_, + const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ - std::move(stages)} { + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } } +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); +} + void GraphicsPipeline::Configure(bool is_indexed) { static constexpr size_t max_images_elements = 64; std::array image_view_ids; @@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { - dynamic = state.dynamic_state; + dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; + const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast(index), @@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), - .divisor = state.binding_divisors[index], + .divisor = key.state.binding_divisors[index], }); } } static_vector vertex_attributes; const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; if (!attribute.enabled || !input_attributes[index].used) { continue; } @@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { if (!spv_modules[1] && !spv_modules[2]) { LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); @@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, @@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pScissors = nullptr, }; std::array swizzles; - std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = - MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - const size_t num_attachments{NumAttachments(state)}; + const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, @@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend{state.attachments[index]}; + const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7d14d2378..fd787840b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,10 +4,12 @@ #pragma once +#include #include #include #include #include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -20,6 +22,39 @@ namespace Vulkan { +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; class RenderPassCache; class VKScheduler; @@ -35,7 +70,8 @@ public: const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, const FixedPipelineState& state, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); @@ -47,16 +83,30 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; + void AddTransition(GraphicsPipeline* transition); + + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; + } + private: void MakePipeline(const Device& device, VkRenderPass render_pass); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - const FixedPipelineState state; + + std::vector transition_keys; + std::vector transitions; std::array spv_modules; std::array stage_infos; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4317b2ac7..2bd870060 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (!RefreshStages()) { + current_pipeline = nullptr; return nullptr; } graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return current_pipeline; + } + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; - if (!is_new) { - return pipeline.get(); + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - pipeline = CreateGraphicsPipeline(); - return pipeline.get(); + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return current_pipeline; } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { } bool PipelineCache::RefreshStages() { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_valid_shaders; + } + dirty[VideoCommon::Dirty::Shaders] = false; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { @@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() { const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_valid_shaders = false; return false; } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; @@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() { shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } + last_valid_shaders = true; return true; } @@ -832,8 +852,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), - infos); + update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e12e4422f..ad569acc4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -struct GraphicsPipelineCacheKey { - std::array unique_hashes; - FixedPipelineState state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - size_t Size() const noexcept { - return sizeof(unique_hashes) + state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - } // namespace Vulkan namespace std { @@ -89,13 +69,6 @@ struct hash { } }; -template <> -struct hash { - size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - } // namespace std namespace Vulkan { @@ -181,7 +154,10 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + std::array shader_infos{}; + bool last_valid_shaders{}; std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 5ed871398b0e89cb3f2e3eb740d431f5faaa12e4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:28:02 -0300 Subject: vk_graphics_pipeline: Generate specialized pipeline config functions and improve code --- .../renderer_vulkan/vk_graphics_pipeline.cpp | 239 ++++++++++++++++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 22 +- 2 files changed, 230 insertions(+), 31 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 100a5e07a..674226cb7 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,14 +19,24 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" +#ifdef _MSC_VER +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace Vulkan { namespace { using boost::container::small_vector; using boost::container::static_vector; +using Shader::ImageBufferDescriptor; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; +constexpr size_t MAX_IMAGE_ELEMENTS = 64; + DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { DescriptorLayoutBuilder builder{device.GetLogical()}; for (size_t index = 0; index < infos.size(); ++index) { @@ -116,6 +126,80 @@ size_t NumAttachments(const FixedPipelineState& state) { } return num; } + +template +bool Passes(const std::array& modules, + const std::array& stage_infos) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (!Spec::enabled_stages[stage] && modules[stage]) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& modules, + const std::array& stage_infos) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(modules, stage_infos)) { + return FindSpec(modules, stage_infos); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +ConfigureFuncPtr ConfigureFunc(const std::array& modules, + const std::array& infos) { + return FindSpec(modules, infos); +} } // Anonymous namespace GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, @@ -144,6 +228,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; + Validate(); MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -155,6 +240,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } else { func(); } + configure_func = ConfigureFunc(spv_modules, stage_infos); } void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { @@ -162,26 +248,29 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { transitions.push_back(transition); } -void GraphicsPipeline::Configure(bool is_indexed) { - static constexpr size_t max_images_elements = 64; - std::array image_view_ids; - static_vector image_view_indices; - static_vector samplers; +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } } const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; const auto read_handle{[&](const auto& desc, u32 index) { @@ -207,33 +296,60 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index++] = handle.image; } }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_image); - std::ranges::for_each(info.image_buffer_descriptors, add_image); + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_image(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + image_view_indices[image_index] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers.push_back(sampler->Handle()); + samplers[image_index] = sampler->Handle(); + ++image_index; + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); } } - std::ranges::for_each(info.image_descriptors, add_image); + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); ImageId* texture_buffer_index{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; @@ -245,29 +361,75 @@ void GraphicsPipeline::Configure(bool is_indexed) { } }}; const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { + buffer_cache.UnbindGraphicsTextureBuffers(stage); + } + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } for (const auto& desc : info.texture_descriptors) { texture_buffer_index += desc.count; } - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); } - buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); const VkSampler* samplers_it{samplers.data()}; const ImageId* views_it{image_view_ids.data()}; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, update_descriptor_queue); + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + ConfigureDraw(); +} + +void GraphicsPipeline::ConfigureDraw() { texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); @@ -550,4 +712,23 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa }); } +void GraphicsPipeline::Validate() { + size_t num_images{}; + for (const auto& info : stage_infos) { + for (const auto& desc : info.texture_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_images += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_images <= MAX_IMAGE_ELEMENTS); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index fd787840b..edab5703f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -75,8 +75,6 @@ public: std::array stages, const std::array& infos); - void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; @@ -85,6 +83,10 @@ public: void AddTransition(GraphicsPipeline* transition); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; @@ -94,9 +96,23 @@ public: : nullptr; } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: + template + void ConfigureImpl(bool is_indexed); + + void ConfigureDraw(); + void MakePipeline(const Device& device, VkRenderPass render_pass); + void Validate(); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; @@ -105,6 +121,8 @@ private: VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; + void (*configure_func)(GraphicsPipeline*, bool); + std::vector transition_keys; std::vector transitions; -- cgit v1.2.3 From 2f3c3dfc10a318f63862c4976f0608ea50c19387 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 00:15:32 -0300 Subject: vulkan: Rework descriptor allocation algorithm Create multiple descriptor pools on demand. There are some degrees of freedom what is considered a compatible pool to avoid wasting large pools on small descriptors. --- src/video_core/renderer_vulkan/blit_image.cpp | 19 +- src/video_core/renderer_vulkan/blit_image.h | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_buffer_cache.h | 4 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 200 +++++++++++---------- src/video_core/renderer_vulkan/vk_compute_pass.h | 28 +-- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 +- .../renderer_vulkan/vk_compute_pipeline.h | 2 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 172 +++++++++++++----- .../renderer_vulkan/vk_descriptor_pool.h | 62 +++++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 6 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 2 +- 15 files changed, 314 insertions(+), 197 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 39fe9289b..4058f62cd 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA .bindingCount = 1, .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, }; +template +inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 0, + .texture_buffers = 0, + .image_buffers = 0, + .textures = num_textures, + .images = 0, + .score = 2, +}; constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -326,14 +336,16 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi } // Anonymous namespace BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, - StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) + StateTracker& state_tracker_, DescriptorPool& descriptor_pool) : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), - one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), - two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), + one_texture_descriptor_allocator{ + descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, + two_textures_descriptor_allocator{ + descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( PipelineLayoutCreateInfo(one_texture_set_layout.address()))), two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( @@ -415,7 +427,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { - ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); } diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, - StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); + StateTracker& state_tracker, DescriptorPool& descriptor_pool); ~BlitImageHelper(); void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index cdda56ab1..568993c58 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -116,7 +116,7 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool) + DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index ea17406dc..c52001b5a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -16,7 +16,7 @@ namespace Vulkan { class Device; -class VKDescriptorPool; +class DescriptorPool; class VKScheduler; class BufferCacheRuntime; @@ -61,7 +61,7 @@ public: explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, VKScheduler& scheduler_, StagingBufferPool& staging_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, - VKDescriptorPool& descriptor_pool); + DescriptorPool& descriptor_pool); void Finish(); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index ec9866605..e2f3d16bf 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; constexpr size_t ASTC_NUM_BINDINGS = 4; -VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - return { - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = static_cast(size), - }; -} - -std::array BuildInputOutputDescriptorSetBindings() { - return {{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +template +inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast(size), +}; -std::array BuildASTCDescriptorSetBindings() { - return {{ - { - .binding = ASTC_BINDING_INPUT_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_ENC_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_SWIZZLE_BUFFER, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = ASTC_BINDING_OUTPUT_IMAGE, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = nullptr, - }, - }}; -} +constexpr std::array INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 2, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 0, + .score = 2, +}; -VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - return { - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 2, +constexpr std::array ASTC_DESCRIPTOR_SET_BINDINGS{{ + { + .binding = ASTC_BINDING_INPUT_BUFFER, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .offset = 0, - .stride = sizeof(DescriptorUpdateEntry), - }; -} + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_ENC_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_SWIZZLE_BUFFER, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = ASTC_BINDING_OUTPUT_IMAGE, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, +}}; + +constexpr DescriptorBankInfo ASTC_BANK_INFO{ + .uniform_buffers = 0, + .storage_buffers = 3, + .texture_buffers = 0, + .image_buffers = 0, + .textures = 0, + .images = 1, + .score = 4, +}; + +constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), +}; -std::array -BuildASTCPassDescriptorUpdateTemplateEntry() { - return {{ +constexpr std::array + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ { .dstBinding = ASTC_BINDING_INPUT_BUFFER, .dstArrayElement = 0, @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { .stride = sizeof(DescriptorUpdateEntry), }, }}; -} struct AstcPushConstants { std::array blocks_dims; @@ -159,14 +170,13 @@ struct AstcPushConstants { u32 block_height; u32 block_height_mask; }; - } // Anonymous namespace -VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, - std::span code) { +ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code) { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -196,8 +206,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .pipelineLayout = *layout, .set = 0, }); - - descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); } module = device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -226,23 +235,23 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ }); } -VKComputePass::~VKComputePass() = default; +ComputePass::~ComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet( - VKUpdateDescriptorQueue& update_descriptor_queue) { +VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { if (!descriptor_template) { return nullptr; } - const VkDescriptorSet set = descriptor_allocator->Commit(); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(descriptor_template.address(), set); return set; } -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, + StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), + : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, + VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -277,12 +286,12 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer } QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), - BuildInputOutputDescriptorUpdateTemplate(), - BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), + : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, VULKAN_QUAD_INDEXED_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_} {} @@ -337,14 +346,13 @@ std::pair QuadIndexedPass::Assemble( } ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_) - : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), - BuildASTCPassDescriptorUpdateTemplateEntry(), - BuildComputePushConstantRange(sizeof(AstcPushConstants)), - ASTC_DECODER_COMP_SPV), + : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, + ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..54c1ac4cb 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include @@ -27,13 +26,14 @@ class VKUpdateDescriptorQueue; class Image; struct StagingBufferRef; -class VKComputePass { +class ComputePass { public: - explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, - vk::Span bindings, - vk::Span templates, - vk::Span push_constants, std::span code); - ~VKComputePass(); + explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, + vk::Span bindings, + vk::Span templates, + const DescriptorBankInfo& bank_info, + vk::Span push_constants, std::span code); + ~ComputePass(); protected: VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); @@ -44,14 +44,14 @@ protected: private: vk::DescriptorSetLayout descriptor_set_layout; - std::optional descriptor_allocator; + DescriptorAllocator descriptor_allocator; vk::ShaderModule module; }; -class Uint8Pass final : public VKComputePass { +class Uint8Pass final : public ComputePass { public: explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~Uint8Pass(); @@ -66,10 +66,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class QuadIndexedPass final : public VKComputePass { +class QuadIndexedPass final : public ComputePass { public: explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_); ~QuadIndexedPass(); @@ -84,10 +84,10 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; -class ASTCDecoderPass final : public VKComputePass { +class ASTCDecoderPass final : public ComputePass { public: explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, - VKDescriptorPool& descriptor_pool_, + DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, MemoryAllocator& memory_allocator_); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 990ead575..54a57c358 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,7 +18,7 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) @@ -30,7 +30,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_set_layout = builder.CreateDescriptorSetLayout(); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 8efdc2926..0d4cd37be 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -25,7 +25,7 @@ class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* thread_worker, const Shader::Info& info, vk::ShaderModule spv_module); diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 3bea1ff44..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include +#include #include #include "common/common_types.h" @@ -13,77 +15,149 @@ namespace Vulkan { -// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. -constexpr std::size_t SETS_GROW_RATE = 0x20; +// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines +constexpr size_t SETS_GROW_RATE = 16; +constexpr s32 SCORE_THRESHOLD = 3; +constexpr u32 SETS_PER_POOL = 64; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, - VkDescriptorSetLayout layout_) - : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{&descriptor_pool_}, layout{layout_} {} +struct DescriptorBank { + DescriptorBankInfo info; + std::vector pools; +}; -VkDescriptorSet DescriptorAllocator::Commit() { - const std::size_t index = CommitResource(); - return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { + return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && + texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && + textures >= subset.textures && images >= subset.image_buffers; } -void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); +template +static u32 Accumulate(const Descriptors& descriptors) { + u32 count = 0; + for (const auto& descriptor : descriptors) { + count += descriptor.count; + } + return count; } -VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) - : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ - AllocateNewPool()} {} - -VKDescriptorPool::~VKDescriptorPool() = default; - -vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { - static constexpr u32 num_sets = 0x20000; - static constexpr VkDescriptorPoolSize pool_sizes[] = { - {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, - {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, - {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, - }; +static DescriptorBankInfo MakeBankInfo(std::span infos) { + DescriptorBankInfo bank; + for (const Shader::Info& info : infos) { + bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); + bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); + bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); + bank.image_buffers += Accumulate(info.image_buffer_descriptors); + bank.textures += Accumulate(info.texture_descriptors); + bank.images += Accumulate(info.image_descriptors); + } + bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + + bank.image_buffers + bank.textures + bank.images; + return bank; +} - const VkDescriptorPoolCreateInfo ci{ +static void AllocatePool(const Device& device, DescriptorBank& bank) { + std::array pool_sizes; + size_t pool_cursor{}; + const auto add = [&](VkDescriptorType type, u32 count) { + if (count > 0) { + pool_sizes[pool_cursor++] = { + .type = type, + .descriptorCount = count * SETS_PER_POOL, + }; + } + }; + const auto& info{bank.info}; + add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); + add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); + add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); + add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); + add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); + bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, - .maxSets = num_sets, - .poolSizeCount = static_cast(std::size(pool_sizes)), + .maxSets = SETS_PER_POOL, + .poolSizeCount = static_cast(pool_cursor), .pPoolSizes = std::data(pool_sizes), - }; - return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); + })); } -vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, - std::size_t count) { - const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai{ +DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_) + : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, + layout{layout_} {} + +VkDescriptorSet DescriptorAllocator::Commit() { + const size_t index = CommitResource(); + return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; +} + +void DescriptorAllocator::Allocate(size_t begin, size_t end) { + sets.push_back(AllocateDescriptors(end - begin)); +} + +vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { + const std::vector layouts(count, layout); + VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = nullptr, - .descriptorPool = **active_pool, + .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast(count), - .pSetLayouts = layout_copies.data(), + .pSetLayouts = layouts.data(), }; - - vk::DescriptorSets sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // Our current pool is out of memory. Allocate a new one and retry - active_pool = AllocateNewPool(); - ai.descriptorPool = **active_pool; - sets = active_pool->Allocate(ai); - if (!sets.IsOutOfPoolMemory()) { - return sets; + AllocatePool(*device, *bank); + allocate_info.descriptorPool = *bank->pools.back(); + new_sets = bank->pools.back().Allocate(allocate_info); + if (!new_sets.IsOutOfPoolMemory()) { + return new_sets; } - // After allocating a new pool, we are out of memory again. We can't handle this from here. throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); } +DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} + +DescriptorPool::~DescriptorPool() = default; + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + std::span infos) { + return Allocator(layout, MakeBankInfo(infos)); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const Shader::Info& info) { + return Allocator(layout, MakeBankInfo(std::array{info})); +} + +DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, + const DescriptorBankInfo& info) { + return DescriptorAllocator(device, master_semaphore, Bank(info), layout); +} + +DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { + std::shared_lock read_lock{banks_mutex}; + const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { + return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); + }); + if (it != bank_infos.end()) { + return *banks[std::distance(bank_infos.begin(), it)].get(); + } + read_lock.unlock(); + + std::unique_lock write_lock{banks_mutex}; + bank_infos.push_back(reqs); + + auto& bank = *banks.emplace_back(std::make_unique()); + bank.info = reqs; + AllocatePool(device, bank); + return bank; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 2501f9967..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -4,21 +4,38 @@ #pragma once +#include +#include #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKDescriptorPool; class VKScheduler; +struct DescriptorBank; + +struct DescriptorBankInfo { + [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; + + u32 uniform_buffers{}; ///< Number of uniform buffer descriptors + u32 storage_buffers{}; ///< Number of storage buffer descriptors + u32 texture_buffers{}; ///< Number of texture buffer descriptors + u32 image_buffers{}; ///< Number of image buffer descriptors + u32 textures{}; ///< Number of texture descriptors + u32 images{}; ///< Number of image descriptors + s32 score{}; ///< Number of descriptors in total +}; + class DescriptorAllocator final : public ResourcePool { + friend class DescriptorPool; + public: explicit DescriptorAllocator() = default; - explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override = default; DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; @@ -29,36 +46,43 @@ public: VkDescriptorSet Commit(); -protected: - void Allocate(std::size_t begin, std::size_t end) override; - private: - VKDescriptorPool* descriptor_pool{}; + explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, + DescriptorBank& bank_, VkDescriptorSetLayout layout_); + + void Allocate(size_t begin, size_t end) override; + + vk::DescriptorSets AllocateDescriptors(size_t count); + + const Device* device{}; + DescriptorBank* bank{}; VkDescriptorSetLayout layout{}; - std::vector descriptors_allocations; + std::vector sets; }; -class VKDescriptorPool final { - friend DescriptorAllocator; - +class DescriptorPool { public: - explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); - ~VKDescriptorPool(); + explicit DescriptorPool(const Device& device, VKScheduler& scheduler); + ~DescriptorPool(); - VKDescriptorPool(const VKDescriptorPool&) = delete; - VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + DescriptorPool& operator=(const DescriptorPool&) = delete; + DescriptorPool(const DescriptorPool&) = delete; -private: - vk::DescriptorPool* AllocateNewPool(); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, + std::span infos); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); + DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); - vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); +private: + DescriptorBank& Bank(const DescriptorBankInfo& reqs); const Device& device; MasterSemaphore& master_semaphore; - std::vector pools; - vk::DescriptorPool* active_pool; + std::shared_mutex banks_mutex; + std::vector bank_infos; + std::vector> banks; }; } // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 674226cb7..0526c197a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,7 +205,7 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, @@ -220,7 +220,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index edab5703f..454fc049e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -67,7 +67,7 @@ public: explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, VKDescriptorPool& descriptor_pool, + const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2bd870060..9d9729022 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -647,7 +647,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, - VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ad569acc4..eec17d3fd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -75,10 +75,10 @@ namespace Vulkan { class ComputePipeline; class Device; +class DescriptorPool; class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; -class VKDescriptorPool; class VKScheduler; class VKUpdateDescriptorQueue; @@ -105,7 +105,7 @@ public: Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, - VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, + VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); @@ -147,7 +147,7 @@ private: const Device& device; VKScheduler& scheduler; - VKDescriptorPool& descriptor_pool; + DescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; RenderPassCache& render_pass_cache; BufferCache& buffer_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2f1551e65..1302bed02 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -147,7 +147,7 @@ private: VKScheduler& scheduler; StagingBufferPool staging_pool; - VKDescriptorPool descriptor_pool; + DescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; -- cgit v1.2.3 From ac8835659ead30d289ff8b907a2295d87790670f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 25 Apr 2021 01:04:49 -0300 Subject: vulkan: Defer descriptor set work to the Vulkan thread Move descriptor lookup and update code to a separate thread. Delaying this removes work from the main GPU thread and allows creating descriptor layouts on another thread. This reduces a bit the workload of the main thread when new pipelines are encountered. --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 45 ++++++++++------------ src/video_core/renderer_vulkan/vk_compute_pass.h | 8 ++-- .../renderer_vulkan/vk_compute_pipeline.cpp | 36 +++++++++-------- .../renderer_vulkan/vk_compute_pipeline.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 40 +++++++++---------- .../renderer_vulkan/vk_graphics_pipeline.h | 5 ++- .../renderer_vulkan/vk_update_descriptor.cpp | 9 ----- .../renderer_vulkan/vk_update_descriptor.h | 4 +- 8 files changed, 69 insertions(+), 79 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e2f3d16bf..7e5ba283b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -172,11 +172,12 @@ struct AstcPushConstants { }; } // Anonymous namespace -ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, +ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, const DescriptorBankInfo& bank_info, - vk::Span push_constants, std::span code) { + vk::Span push_constants, std::span code) + : device{device_} { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, @@ -237,15 +238,6 @@ ComputePass::ComputePass(const Device& device, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -VkDescriptorSet ComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue) { - if (!descriptor_template) { - return nullptr; - } - const VkDescriptorSet set = descriptor_allocator.Commit(); - update_descriptor_queue.Send(descriptor_template.address(), set); - return set; -} - Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) @@ -265,10 +257,11 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + const VkBuffer buffer{staging.buffer}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_vertices](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, @@ -276,6 +269,8 @@ std::pair Uint8Pass::Assemble(u32 num_vertices, VkBuffer .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); @@ -321,10 +316,10 @@ std::pair QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([this, buffer = staging.buffer, set, num_tri_vertices, base_vertex, + scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { static constexpr u32 DISPATCH_SIZE = 1024; static constexpr VkMemoryBarrier WRITE_BARRIER{ @@ -333,7 +328,9 @@ std::pair QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, }; - const std::array push_constants = {base_vertex, index_shift}; + const std::array push_constants{base_vertex, index_shift}; + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), @@ -353,7 +350,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, COMPUTE_PUSH_CONSTANT_RANGE, ASTC_DECODER_COMP_SPV), - device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, + scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} ASTCDecoderPass::~ASTCDecoderPass() = default; @@ -451,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), sizeof(SWIZZLE_TABLE)); update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); - - const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); - const VkPipelineLayout vk_layout = *layout; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; // To unswizzle the ASTC data const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); - scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, - block_dims, params, set](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, + params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .bytes_per_block_log2 = params.bytes_per_block_log2, @@ -470,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .block_height = params.block_height, .block_height_mask = params.block_height_mask, }; - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); - cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); + const VkDescriptorSet set = descriptor_allocator.Commit(); + device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); + cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 54c1ac4cb..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -36,15 +36,14 @@ public: ~ComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); - + const Device& device; vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; vk::Pipeline pipeline; - -private: vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; + +private: vk::ShaderModule module; }; @@ -99,7 +98,6 @@ public: private: void MakeDataBuffer(); - const Device& device; VKScheduler& scheduler; StagingBufferPool& staging_buffer_pool; VKUpdateDescriptorQueue& update_descriptor_queue; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 54a57c358..feaace0c5 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,21 +18,22 @@ namespace Vulkan { -ComputePipeline::ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, +ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{update_descriptor_queue_}, info{info_}, + : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { - DescriptorLayoutBuilder builder{device.GetLogical()}; - builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); + auto func{[this, &descriptor_pool] { + DescriptorLayoutBuilder builder{device.GetLogical()}; + builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); - descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); + descriptor_update_template = + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - auto func{[this, &device] { const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -166,15 +167,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - scheduler.Record([this](vk::CommandBuffer cmdbuf) { + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - }); - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, descriptor_set, nullptr); }); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 0d4cd37be..a560e382e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -40,6 +40,7 @@ public: VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: + const Device& device; VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0526c197a..76080bde1 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -205,31 +205,31 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device, DescriptorPool& descriptor_pool, + const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, scheduler{scheduler_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + auto func{[this, &render_pass_cache, &descriptor_pool] { + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; + descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - auto func{[this, &device, &render_pass_cache, builder] { const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); - MakePipeline(device, render_pass); + MakePipeline(render_pass); std::lock_guard lock{build_mutex}; is_built = true; @@ -440,24 +440,22 @@ void GraphicsPipeline::ConfigureDraw() { build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); }); } - if (scheduler.UpdateGraphicsPipeline(this)) { - scheduler.Record([this](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - }); - } - if (!descriptor_set_layout) { - return; - } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue.Send(descriptor_update_template.address(), descriptor_set); - - scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; + const void* const descriptor_data{update_descriptor_queue.UpdateData()}; + scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (!descriptor_set_layout) { + return; + } + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = key.state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 454fc049e..85e21f611 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -109,19 +109,20 @@ private: void ConfigureDraw(); - void MakePipeline(const Device& device, VkRenderPass render_pass); + void MakePipeline(VkRenderPass render_pass); void Validate(); const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; + const Device& device; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - void (*configure_func)(GraphicsPipeline*, bool); + void (*configure_func)(GraphicsPipeline*, bool){}; std::vector transition_keys; std::vector transitions; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index bea9b8012..ce3427c9b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -36,13 +36,4 @@ void VKUpdateDescriptorQueue::Acquire() { upload_start = payload_cursor; } -void VKUpdateDescriptorQueue::Send(const VkDescriptorUpdateTemplateKHR* update_template, - VkDescriptorSet set) { - const void* const data = upload_start; - const vk::Device* const logical = &device.GetLogical(); - scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { - logical->UpdateDescriptorSet(set, *update_template, data); - }); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82bc9920c..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -39,7 +39,9 @@ public: void Acquire(); - void Send(const VkDescriptorUpdateTemplateKHR* update_template, VkDescriptorSet set); + const DescriptorUpdateEntry* UpdateData() const noexcept { + return upload_start; + } void AddSampledImage(VkImageView image_view, VkSampler sampler) { *(payload_cursor++) = VkDescriptorImageInfo{ -- cgit v1.2.3 From 025b20f96ae588777e3ff11083cc4184bf418af6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 03:53:26 -0300 Subject: shader: Move pipeline cache logic to separate files Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL. --- src/video_core/CMakeLists.txt | 3 + src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 21 +- src/video_core/renderer_opengl/gl_shader_cache.h | 58 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 719 +++------------------ src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_cache.cpp | 233 +++++++ src/video_core/shader_cache.h | 198 ++---- src/video_core/shader_environment.cpp | 453 +++++++++++++ src/video_core/shader_environment.h | 198 ++++++ 12 files changed, 1095 insertions(+), 824 deletions(-) create mode 100644 src/video_core/shader_cache.cpp create mode 100644 src/video_core/shader_environment.cpp create mode 100644 src/video_core/shader_environment.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3166a69dc..6e0e4b8f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -145,7 +145,10 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + shader_cache.cpp shader_cache.h + shader_environment.cpp + shader_environment.h shader_notify.cpp shader_notify.h surface.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1f58f8791..2fdcbe4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -217,7 +217,7 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - ShaderCacheOpenGL shader_cache; + ShaderCache shader_cache; QueryCache query_cache; AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4dd166156..c3e490b40 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -29,18 +29,13 @@ namespace OpenGL { -Shader::Shader() = default; - -Shader::~Shader() = default; - -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) - : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} - -ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; +ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + +ShaderCache::~ShaderCache() = default; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ad3d15a76..96520e17c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -36,27 +36,59 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -class Shader { +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + std::array color_formats; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, u32> tessellation_primitive; + BitField<8, 2, u32> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + u32 padding; + TransformFeedbackState xfb_state; + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { public: - explicit Shader(); - ~Shader(); +private: }; -class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { +class ShaderCache : public VideoCommon::ShaderCache { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); - ~ShaderCacheOpenGL() override; + explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); + ~ShaderCache(); private: Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -23,7 +23,7 @@ namespace Vulkan { struct GraphicsPipelineCacheKey { - std::array unique_hashes; + std::array unique_hashes; FixedPipelineState state; size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,7 +11,8 @@ #include "common/bit_cast.h" #include "common/cityhash.h" -#include "common/file_util.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" #include "common/thread_worker.h" #include "core/core.h" @@ -36,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -43,449 +45,19 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -template -auto MakeSpan(Container& container) { - return std::span(container.data(), container.size()); -} - -static u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | offset; -} - -class GenericEnvironment : public Shader::Environment { -public: - explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} { - start_address = start_address_; - } - - ~GenericEnvironment() override = default; - - u32 TextureBoundBuffer() const final { - return texture_bound; - } - - u32 LocalMemorySize() const final { - return local_memory_size; - } - - u32 SharedMemorySize() const final { - return shared_memory_size; - } - - std::array WorkgroupSize() const final { - return workgroup_size; - } - - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - - std::optional Analyze() { - const std::optional size{TryFindSize()}; - if (!size) { - return std::nullopt; - } - cached_lowest = start_address; - cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), *size); - } - - void SetCachedSize(size_t size_bytes) { - cached_lowest = start_address; - cached_highest = start_address + static_cast(size_bytes); - code.resize(CachedSize()); - gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); - } - - [[nodiscard]] size_t CachedSize() const noexcept { - return cached_highest - cached_lowest + INST_SIZE; - } - - [[nodiscard]] size_t ReadSize() const noexcept { - return read_highest - read_lowest + INST_SIZE; - } - - [[nodiscard]] bool CanBeSerialized() const noexcept { - return !has_unbound_instructions; - } - - [[nodiscard]] u128 CalculateHash() const { - const size_t size{ReadSize()}; - const auto data{std::make_unique(size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(data.get(), size); - } - - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(CachedSize())}; - const u64 num_texture_types{static_cast(texture_types.size())}; - const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - - file.write(reinterpret_cast(&code_size), sizeof(code_size)) - .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) - .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) - .write(reinterpret_cast(&stage), sizeof(stage)) - .write(reinterpret_cast(code.data()), code_size); - for (const auto [key, type] : texture_types) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - for (const auto [key, type] : cbuf_values) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - if (stage == Shader::Stage::Compute) { - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .write(reinterpret_cast(&shared_memory_size), - sizeof(shared_memory_size)); - } else { - file.write(reinterpret_cast(&sph), sizeof(sph)); - } - } - -protected: - static constexpr size_t INST_SIZE = sizeof(u64); - - std::optional TryFindSize() { - constexpr size_t BLOCK_SIZE = 0x1000; - constexpr size_t MAXIMUM_SIZE = 0x100000; - - constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - - GPUVAddr guest_addr{program_base + start_address}; - size_t offset{0}; - size_t size{BLOCK_SIZE}; - while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); - u64* const data = code.data() + offset / INST_SIZE; - gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { - const u64 inst = data[index / INST_SIZE]; - if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + index; - } - } - guest_addr += BLOCK_SIZE; - size += BLOCK_SIZE; - offset += BLOCK_SIZE; - } - return std::nullopt; - } - - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; - Tegra::Texture::TICEntry entry; - gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - - const Shader::TextureType result{[&] { - switch (entry.texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return Shader::TextureType::Color1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return Shader::TextureType::Color2D; - case Tegra::Texture::TextureType::Texture3D: - return Shader::TextureType::Color3D; - case Tegra::Texture::TextureType::TextureCubemap: - return Shader::TextureType::ColorCube; - case Tegra::Texture::TextureType::Texture1DArray: - return Shader::TextureType::ColorArray1D; - case Tegra::Texture::TextureType::Texture2DArray: - return Shader::TextureType::ColorArray2D; - case Tegra::Texture::TextureType::Texture1DBuffer: - return Shader::TextureType::Buffer; - case Tegra::Texture::TextureType::TextureCubeArray: - return Shader::TextureType::ColorArrayCube; - default: - throw Shader::NotImplementedException("Unknown texture type"); - } - }()}; - texture_types.emplace(raw, result); - return result; - } - - Tegra::MemoryManager* gpu_memory{}; - GPUVAddr program_base{}; - - std::vector code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - - u32 local_memory_size{}; - u32 texture_bound{}; - u32 shared_memory_size{}; - std::array workgroup_size{}; - - u32 read_lowest = std::numeric_limits::max(); - u32 read_highest = 0; - - u32 cached_lowest = std::numeric_limits::max(); - u32 cached_highest = 0; - - bool has_unbound_instructions = false; -}; - namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; -// TODO: Move this to a separate file -constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{2}; - -class GraphicsEnvironment final : public GenericEnvironment { -public: - explicit GraphicsEnvironment() = default; - explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); - switch (program) { - case Maxwell::ShaderProgram::VertexA: - stage = Shader::Stage::VertexA; - stage_index = 0; - break; - case Maxwell::ShaderProgram::VertexB: - stage = Shader::Stage::VertexB; - stage_index = 0; - break; - case Maxwell::ShaderProgram::TesselationControl: - stage = Shader::Stage::TessellationControl; - stage_index = 1; - break; - case Maxwell::ShaderProgram::TesselationEval: - stage = Shader::Stage::TessellationEval; - stage_index = 2; - break; - case Maxwell::ShaderProgram::Geometry: - stage = Shader::Stage::Geometry; - stage_index = 3; - break; - case Maxwell::ShaderProgram::Fragment: - stage = Shader::Stage::Fragment; - stage_index = 4; - break; - default: - UNREACHABLE_MSG("Invalid program={}", program); - break; - } - const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); - texture_bound = maxwell3d->regs.tex_cb_index; - } - - ~GraphicsEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.address + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{maxwell3d->regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); - } - -private: - Tegra::Engines::Maxwell3D* maxwell3d{}; - size_t stage_index{}; -}; - -class ComputeEnvironment final : public GenericEnvironment { -public: - explicit ComputeEnvironment() = default; - explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ - &kepler_compute_} { - const auto& qmd{kepler_compute->launch_description}; - stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; - texture_bound = kepler_compute->regs.tex_cb_index; - shared_memory_size = qmd.shared_alloc; - workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - - ~ComputeEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.Address() + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{kepler_compute->regs}; - const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); - } - -private: - Tegra::Engines::KeplerCompute* kepler_compute{}; -}; - -void SerializePipeline(std::span key, std::span envs, - std::ofstream& file) { - if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { - return; - } - const u32 num_envs{static_cast(envs.size())}; - file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); - for (const GenericEnvironment* const env : envs) { - env->Serialize(file); - } - file.write(key.data(), key.size_bytes()); -} - -template -void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { - try { - std::ofstream file; - file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); - if (!file.is_open()) { - LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); - return; - } - if (file.tellp() == 0) { - file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) - .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); - } - const std::span key_span(reinterpret_cast(&key), sizeof(key)); - SerializePipeline(key_span, MakeSpan(envs), file); - - } catch (const std::ios_base::failure& e) { - LOG_ERROR(Common_Filesystem, "{}", e.what()); - if (!Common::FS::Delete(filename)) { - LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); - } - } +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); } -class FileEnvironment final : public Shader::Environment { -public: - void Deserialize(std::ifstream& file) { - u64 code_size{}; - u64 num_texture_types{}; - u64 num_cbuf_values{}; - file.read(reinterpret_cast(&code_size), sizeof(code_size)) - .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .read(reinterpret_cast(&start_address), sizeof(start_address)) - .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .read(reinterpret_cast(&read_highest), sizeof(read_highest)) - .read(reinterpret_cast(&stage), sizeof(stage)); - code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast(code.get()), code_size); - for (size_t i = 0; i < num_texture_types; ++i) { - u32 key; - Shader::TextureType type; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&type), sizeof(type)); - texture_types.emplace(key, type); - } - for (size_t i = 0; i < num_cbuf_values; ++i) { - u64 key; - u32 value; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&value), sizeof(value)); - cbuf_values.emplace(key, value); - } - if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); - } else { - file.read(reinterpret_cast(&sph), sizeof(sph)); - } - } - - u64 ReadInstruction(u32 address) override { - if (address < read_lowest || address > read_highest) { - throw Shader::LogicError("Out of bounds address {}", address); - } - return code[(address - read_lowest) / sizeof(u64)]; - } - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; - if (it == cbuf_values.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto it{texture_types.find(handle)}; - if (it == texture_types.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - u32 LocalMemorySize() const override { - return local_memory_size; - } - - u32 SharedMemorySize() const override { - return shared_memory_size; - } - - u32 TextureBoundBuffer() const override { - return texture_bound; - } - - std::array WorkgroupSize() const override { - return workgroup_size; - } - -private: - std::unique_ptr code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - std::array workgroup_size{}; - u32 local_memory_size{}; - u32 shared_memory_size{}; - u32 texture_bound{}; - u32 read_lowest{}; - u32 read_highest{}; -}; - Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso } } // Anonymous namespace -void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - if (title_id == 0) { - return; - } - std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; - std::string base_dir{shader_dir + "/vulkan"}; - std::string transferable_dir{base_dir + "/transferable"}; - std::string precompiled_dir{base_dir + "/precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); - return; - } - pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - - struct { - std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; - } state; - - std::ifstream file; - Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - return; - } - file.exceptions(std::ifstream::failbit); - const auto end{file.tellg()}; - file.seekg(0, std::ios::beg); - - std::array magic_number; - u32 cache_version; - file.read(magic_number.data(), magic_number.size()) - .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { - file.close(); - if (Common::FS::Delete(pipeline_cache_filename)) { - if (magic_number != MAGIC_NUMBER) { - LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); - } - if (cache_version != CACHE_VERSION) { - LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); - } - } else { - LOG_ERROR(Render_Vulkan, - "Invalid pipeline cache file and failed to delete it in \"{}\"", - pipeline_cache_filename); - } - return; - } - while (file.tellg() != end) { - if (stop_loading) { - return; - } - u32 num_envs{}; - file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - std::vector envs(num_envs); - for (FileEnvironment& env : envs) { - env.Deserialize(file); - } - if (envs.front().ShaderStage() == Shader::Stage::Compute) { - ComputePipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } else { - GraphicsPipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - boost::container::static_vector env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } - ++state.total; - } - { - std::lock_guard lock{state.mutex}; - callback(VideoCore::LoadCallbackStage::Build, 0, state.total); - state.has_loaded = true; - } - workers.WaitForRequests(); -} - size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (!RefreshStages()) { + if (!RefreshStages(graphics_key.unique_hashes)) { current_pipeline = nullptr; return nullptr; } @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - const GPUVAddr shader_addr{program_base + qmd.program_start}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - return nullptr; - } - const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* const shader{ComputeShader()}; if (!shader) { - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; - shader = MakeShaderInfo(env, *cpu_shader_addr); + return nullptr; } + const auto& qmd{kepler_compute.launch_description}; const ComputePipelineCacheKey key{ - .unique_hash{shader->unique_hash}, - .shared_memory_size{qmd.shared_alloc}, + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return pipeline.get(); } -bool PipelineCache::RefreshStages() { - auto& dirty{maxwell3d.dirty.flags}; - if (!dirty[VideoCommon::Dirty::Shaders]) { - return last_valid_shaders; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - dirty[VideoCommon::Dirty::Shaders] = false; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - graphics_key.unique_hashes[index] = u128{}; - continue; - } - const auto& shader_config{maxwell3d.regs.shader_config[index]}; - const auto program{static_cast(index)}; - const GPUVAddr shader_addr{base_addr + shader_config.offset}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); - last_valid_shaders = false; - return false; - } - const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; - if (!shader_info) { - const u32 start_address{shader_config.offset}; - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; - shader_info = MakeShaderInfo(env, *cpu_shader_addr); - } - shader_infos[index] = shader_info; - graphics_key.unique_hashes[index] = shader_info->unique_hash; + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "vulkan"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; } - last_valid_shaders = true; - return true; -} + pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); -const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { - auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze()}) { - info->unique_hash = *cached_hash; - info->size_bytes = env.CachedSize(); - } else { - // Slow path, not really hit on commercial games - // Build a control flow graph to get the real shader size - main_pools.flow_block.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; - info->unique_hash = env.CalculateHash(); - info->size_bytes = env.ReadSize(); - } - const size_t size_bytes{info->size_bytes}; - const ShaderInfo* const result{info.get()}; - Register(std::move(info), cpu_addr, size_bytes); - return result; + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); } std::unique_ptr PipelineCache::CreateGraphicsPipeline( @@ -815,7 +288,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( size_t env_index{0}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } Shader::Environment& env{*envs[env_index]}; @@ -830,7 +303,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( u32 binding{0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } UNIMPLEMENTED_IF(index == 0); @@ -844,8 +317,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], - key.unique_hashes[index][1])}; + const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } @@ -863,7 +335,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (graphics_key.unique_hashes[index] == 0) { continue; } const auto program{static_cast(index)}; @@ -871,7 +343,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; @@ -882,11 +353,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] != u128{}) { + if (key.unique_hashes[index] != 0) { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -902,8 +373,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + VideoCommon::SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); }); } return pipeline; @@ -921,7 +392,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + const auto name{fmt::format("{:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - u128 unique_hash; + u64 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -76,16 +77,12 @@ namespace Vulkan { class ComputePipeline; class Device; class DescriptorPool; -class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -struct ShaderInfo { - u128 unique_hash{}; - size_t size_bytes{}; -}; +using VideoCommon::ShaderInfo; struct ShaderPools { void ReleaseContents() { @@ -99,17 +96,16 @@ struct ShaderPools { Shader::ObjectPool flow_block; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); - ~PipelineCache() override; + ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -119,10 +115,6 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: - bool RefreshStages(); - - const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,11 +132,6 @@ private: Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -156,16 +143,13 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - std::array shader_infos{}; - bool last_valid_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; - std::string pipeline_cache_filename; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..b8b8eace5 --- /dev/null +++ b/src/video_core/shader_cache.cpp @@ -0,0 +1,233 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" + +namespace VideoCommon { + +void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); +} + +void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); +} + +void ShaderCache::SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); +} + +ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_) + : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + rasterizer{rasterizer_} {} + +bool ShaderCache::RefreshStages(std::array& unique_hashes) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_shaders_valid; + } + dirty[VideoCommon::Dirty::Shaders] = false; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + unique_hashes[index] = 0; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_shaders_valid = false; + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); + } + shader_infos[index] = shader_info; + unique_hashes[index] = shader_info->unique_hash; + } + last_shaders_valid = true; + return true; +} + +const ShaderInfo* ShaderCache::ComputeShader() { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return nullptr; + } + if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { + return shader; + } + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + return MakeShaderInfo(env, *cpu_shader_addr); +} + +ShaderInfo* ShaderCache::TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; +} + +void ShaderCache::Register(std::unique_ptr data, VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); +} + +void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } +} + +void ShaderCache::RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::ranges::sort(marked_for_removal); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } +} + +void ShaderCache::InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { + size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } +} + +void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector& entries = entries_it->second; + + const auto entry_it = std::ranges::find(entries, entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } +} + +void ShaderCache::UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); +} + +void ShaderCache::RemoveShadersFromStorage(std::vector removed_shaders) { + // Remove them from the cache + std::erase_if(storage, [&removed_shaders](const std::unique_ptr& shader) { + return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); + }); +} + +ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { + auto entry = std::make_unique(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; +} + +const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze()}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + Shader::ObjectPool flow_block; + Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..89a4bcc84 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,20 +4,28 @@ #pragma once -#include #include #include #include #include #include -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { -template +class GenericEnvironment; + +struct ShaderInfo { + u64 unique_hash{}; + size_t size_bytes{}; +}; + class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; @@ -25,206 +33,100 @@ class ShaderCache { struct Entry { VAddr addr_start; VAddr addr_end; - T* data; + ShaderInfo* data; bool is_memory_marked = true; - constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + bool Overlaps(VAddr start, VAddr end) const noexcept { return start < addr_end && addr_start < end; } }; public: - virtual ~ShaderCache() = default; - /// @brief Removes shaders inside a given region /// @note Checks for ranges /// @param addr Start address of the invalidation /// @param size Number of bytes of the invalidation - void InvalidateRegion(VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - RemovePendingShaders(); - } + void InvalidateRegion(VAddr addr, size_t size); /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - } + void OnCPUWrite(VAddr addr, size_t size); /// @brief Flushes delayed removal operations - void SyncGuestHost() { - std::scoped_lock lock{invalidation_mutex}; - RemovePendingShaders(); - } + void SyncGuestHost(); + +protected: + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_); + + /// @brief Update the hashes and information of shader stages + /// @param unique_hashes Shader hashes to store into when a stage is enabled + /// @return True no success, false on error + bool RefreshStages(std::array& unique_hashes); + + /// @brief Returns information about the current compute shader + /// @return Pointer to a valid shader, nullptr on error + const ShaderInfo* ComputeShader(); + + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + std::array shader_infos{}; + bool last_shaders_valid = false; + +private: /// @brief Tries to obtain a cached shader starting in a given address /// @note Doesn't check for ranges, the given address has to be the start of the shader /// @param addr Start address of the shader, this doesn't cache for region /// @return Pointer to a valid shader, nullptr when nothing is found - T* TryGet(VAddr addr) const { - std::scoped_lock lock{lookup_mutex}; - - const auto it = lookup_cache.find(addr); - if (it == lookup_cache.end()) { - return nullptr; - } - return it->second->data; - } - -protected: - explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + ShaderInfo* TryGet(VAddr addr) const; /// @brief Register in the cache a given entry /// @param data Shader to store in the cache /// @param addr Start address of the shader that will be registered /// @param size Size in bytes of the shader - void Register(std::unique_ptr data, VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex, lookup_mutex}; - - const VAddr addr_end = addr + size; - Entry* const entry = NewEntry(addr, addr_end, data.get()); - - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - invalidation_cache[page].push_back(entry); - } - - storage.push_back(std::move(data)); + void Register(std::unique_ptr data, VAddr addr, size_t size); - rasterizer.UpdatePagesCachedCount(addr, size, 1); - } - - /// @brief Called when a shader is going to be removed - /// @param shader Shader that will be removed - /// @pre invalidation_cache is locked - /// @pre lookup_mutex is locked - virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} - -private: /// @brief Invalidate pages in a given region /// @pre invalidation_mutex is locked - void InvalidatePagesInRegion(VAddr addr, std::size_t size) { - const VAddr addr_end = addr + size; - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - auto it = invalidation_cache.find(page); - if (it == invalidation_cache.end()) { - continue; - } - InvalidatePageEntries(it->second, addr, addr_end); - } - } + void InvalidatePagesInRegion(VAddr addr, size_t size); /// @brief Remove shaders marked for deletion /// @pre invalidation_mutex is locked - void RemovePendingShaders() { - if (marked_for_removal.empty()) { - return; - } - // Remove duplicates - std::sort(marked_for_removal.begin(), marked_for_removal.end()); - marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), - marked_for_removal.end()); - - std::vector removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); - - std::scoped_lock lock{lookup_mutex}; - - for (Entry* const entry : marked_for_removal) { - removed_shaders.push_back(entry->data); - - const auto it = lookup_cache.find(entry->addr_start); - ASSERT(it != lookup_cache.end()); - lookup_cache.erase(it); - } - marked_for_removal.clear(); - - if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); - } - } + void RemovePendingShaders(); /// @brief Invalidates entries in a given range for the passed page /// @param entries Vector of entries in the page, it will be modified on overlaps /// @param addr Start address of the invalidation /// @param addr_end Non-inclusive end address of the invalidation /// @pre invalidation_mutex is locked - void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { - std::size_t index = 0; - while (index < entries.size()) { - Entry* const entry = entries[index]; - if (!entry->Overlaps(addr, addr_end)) { - ++index; - continue; - } - - UnmarkMemory(entry); - RemoveEntryFromInvalidationCache(entry); - marked_for_removal.push_back(entry); - } - } + void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end); /// @brief Removes all references to an entry in the invalidation cache /// @param entry Entry to remove from the invalidation cache /// @pre invalidation_mutex is locked - void RemoveEntryFromInvalidationCache(const Entry* entry) { - const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { - const auto entries_it = invalidation_cache.find(page); - ASSERT(entries_it != invalidation_cache.end()); - std::vector& entries = entries_it->second; - - const auto entry_it = std::find(entries.begin(), entries.end(), entry); - ASSERT(entry_it != entries.end()); - entries.erase(entry_it); - } - } + void RemoveEntryFromInvalidationCache(const Entry* entry); /// @brief Unmarks an entry from the rasterizer cache /// @param entry Entry to unmark from memory - void UnmarkMemory(Entry* entry) { - if (!entry->is_memory_marked) { - return; - } - entry->is_memory_marked = false; - - const VAddr addr = entry->addr_start; - const std::size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); - } + void UnmarkMemory(Entry* entry); /// @brief Removes a vector of shaders from a list /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector removed_shaders) { - // Notify removals - for (T* const shader : removed_shaders) { - OnShaderRemoval(shader); - } - - // Remove them from the cache - const auto is_removed = [&removed_shaders](const std::unique_ptr& shader) { - return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != - removed_shaders.end(); - }; - std::erase_if(storage, is_removed); - } + void RemoveShadersFromStorage(std::vector removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked - Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { - auto entry = std::make_unique(Entry{addr, addr_end, data}); - Entry* const entry_pointer = entry.get(); + Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); - lookup_cache.emplace(addr, std::move(entry)); - return entry_pointer; - } + /// @brief Create a new shader entry and register it + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); VideoCore::RasterizerInterface& rasterizer; @@ -233,7 +135,7 @@ private: std::unordered_map> lookup_cache; std::unordered_map> invalidation_cache; - std::vector> storage; + std::vector> storage; std::vector marked_for_removal; }; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..5dccc0097 --- /dev/null +++ b/src/video_core/shader_environment.cpp @@ -0,0 +1,453 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/fs/fs.h" +#include "common/logging/log.h" +#include "shader_recompiler/environment.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_environment.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION = 3; + +constexpr size_t INST_SIZE = sizeof(u64); + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +static u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | offset; +} + +static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return Shader::TextureType::Buffer; + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } +} + +GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; +} + +GenericEnvironment::~GenericEnvironment() = default; + +u32 GenericEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +u32 GenericEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 GenericEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +std::array GenericEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +u64 GenericEnvironment::ReadInstruction(u32 address) { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); +} + +std::optional GenericEnvironment::Analyze() { + const std::optional size{TryFindSize()}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash64(reinterpret_cast(code.data()), *size); +} + +void GenericEnvironment::SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); +} + +size_t GenericEnvironment::CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; +} + +size_t GenericEnvironment::ReadSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; +} + +bool GenericEnvironment::CanBeSerialized() const noexcept { + return !has_unbound_instructions; +} + +u64 GenericEnvironment::CalculateHash() const { + const size_t size{ReadSize()}; + const auto data{std::make_unique(size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash64(data.get(), size); +} + +void GenericEnvironment::Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(CachedSize())}; + const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(reinterpret_cast(code.data()), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + if (stage == Shader::Stage::Compute) { + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } +} + +std::optional GenericEnvironment::TryFindSize() { + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + index; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; +} + +Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + const Shader::TextureType result{ConvertType(entry)}; + texture_types.emplace(raw, result); + return result; +} + +GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Maxwell::ShaderProgram program, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + stage_index = 0; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + stage_index = 0; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + stage_index = 1; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + stage_index = 2; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + stage_index = 3; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + stage_index = 4; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + break; + } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; +} + +u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); +} + +ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; + stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; +} + +u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); +} + +void FileEnvironment::Deserialize(std::ifstream& file) { + u64 code_size{}; + u64 num_texture_types{}; + u64 num_cbuf_values{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u32 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } +} + +u64 FileEnvironment::ReadInstruction(u32 address) { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; +} + +u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { + const auto it{texture_types.find(handle)}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +u32 FileEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 FileEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +u32 FileEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +std::array FileEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename) try { + std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + if (file.tellp() == 0) { + // Write header + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); + } + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics) try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return; + } + while (file.tellg() != end) { + if (stop_loading.stop_requested()) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { + env.Deserialize(file); + } + if (envs.front().ShaderStage() == Shader::Stage::Compute) { + load_compute(file, std::move(envs.front())); + } else { + load_graphics(file, std::move(envs)); + } + } + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..37d712045 --- /dev/null +++ b/src/video_core/shader_environment.h @@ -0,0 +1,198 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/unique_function.h" +#include "shader_recompiler/environment.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +class Memorymanager; +} + +namespace VideoCommon { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +class GenericEnvironment : public Shader::Environment { +public: + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~GenericEnvironment() override; + + [[nodiscard]] u32 TextureBoundBuffer() const final; + + [[nodiscard]] u32 LocalMemorySize() const final; + + [[nodiscard]] u32 SharedMemorySize() const final; + + [[nodiscard]] std::array WorkgroupSize() const final; + + [[nodiscard]] u64 ReadInstruction(u32 address) final; + + [[nodiscard]] std::optional Analyze(); + + void SetCachedSize(size_t size_bytes); + + [[nodiscard]] size_t CachedSize() const noexcept; + + [[nodiscard]] size_t ReadSize() const noexcept; + + [[nodiscard]] bool CanBeSerialized() const noexcept; + + [[nodiscard]] u64 CalculateHash() const; + + void Serialize(std::ofstream& file) const; + +protected: + std::optional TryFindSize(); + + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + u32 raw); + + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + + u32 read_lowest = std::numeric_limits::max(); + u32 read_highest = 0; + + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; + + bool has_unbound_instructions = false; +}; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, + GPUVAddr program_base_, u32 start_address_); + + ~GraphicsEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~ComputeEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; + +class FileEnvironment final : public Shader::Environment { +public: + FileEnvironment() = default; + ~FileEnvironment() override = default; + + FileEnvironment& operator=(FileEnvironment&&) noexcept = default; + FileEnvironment(FileEnvironment&&) noexcept = default; + + FileEnvironment& operator=(const FileEnvironment&) = delete; + FileEnvironment(const FileEnvironment&) = delete; + + void Deserialize(std::ifstream& file); + + [[nodiscard]] u64 ReadInstruction(u32 address) override; + + [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + + [[nodiscard]] u32 LocalMemorySize() const override; + + [[nodiscard]] u32 SharedMemorySize() const override; + + [[nodiscard]] u32 TextureBoundBuffer() const override; + + [[nodiscard]] std::array WorkgroupSize() const override; + +private: + std::unique_ptr code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename); + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { + static_assert(std::is_trivially_copyable_v); + static_assert(std::has_unique_object_representations_v); + SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), + std::span(envs.data(), envs.size()), filename); +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics); + +} // namespace VideoCommon -- cgit v1.2.3 From c5425b38c1a4d7eae270780d8b3ba66231015038 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 05:18:26 -0300 Subject: vk_compute_pass: Fix -Wshadow warning --- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7e5ba283b..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -238,10 +238,10 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, ComputePass::~ComputePass() = default; -Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, DescriptorPool& descriptor_pool, - StagingBufferPool& staging_buffer_pool_, +Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, + DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : ComputePass(device, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, + : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, VULKAN_UINT8_COMP_SPV), scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, -- cgit v1.2.3 From 53acdda772a8b7650c46ba9d998119b8c8e30844 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 22:11:31 -0300 Subject: vk_scheduler: Allow command submission on worker thread This changes how Scheduler::Flush works. It queues the current command buffer to be sent to the GPU but does not do it immediately. The Vulkan worker thread takes care of that. Users will have to use Scheduler::Flush + Scheduler::WaitWorker to get the previous behavior. Scheduler::Finish is unchanged. To avoid waiting on work never queued, Scheduler::Wait sends the current command buffer if that's what the caller wants to wait. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 75 +++++++------ src/video_core/renderer_vulkan/vk_blit_screen.cpp | 94 ++++++++-------- src/video_core/renderer_vulkan/vk_query_cache.cpp | 9 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +- src/video_core/renderer_vulkan/vk_scheduler.cpp | 120 +++++++++++---------- src/video_core/renderer_vulkan/vk_scheduler.h | 15 +++ src/video_core/renderer_vulkan/vk_swapchain.cpp | 39 +++---- src/video_core/renderer_vulkan/vk_swapchain.h | 23 ++-- 8 files changed, 200 insertions(+), 182 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..7e39b65bd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -130,35 +125,47 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; } - const auto& layout = render_window.GetFramebufferLayout(); - if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { - const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - const bool is_srgb = use_accelerated && screen_info.is_srgb; - if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); - } - - scheduler.WaitWorker(); - - while (!swapchain.AcquireNextImage()) { - swapchain.Create(layout.width, layout.height, is_srgb); - blit_screen.Recreate(); + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { + return; + } + const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; + const bool use_accelerated = + rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); + const bool is_srgb = use_accelerated && screen_info.is_srgb; + + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + bool has_been_recreated = false; + const auto recreate_swapchain = [&] { + if (!has_been_recreated) { + has_been_recreated = true; + scheduler.WaitWorker(); } - const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); - - scheduler.Flush(render_semaphore); - - if (swapchain.Present(render_semaphore)) { - blit_screen.Recreate(); + swapchain.Create(layout.width, layout.height, is_srgb); + }; + if (swapchain.NeedsRecreate() || + swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + recreate_swapchain(); + } + bool needs_recreate; + do { + needs_recreate = false; + swapchain.AcquireNextImage(); + if (swapchain.NeedsRecreate()) { + recreate_swapchain(); + needs_recreate = true; } - gpu.RendererFrameEndNotify(); - rasterizer.TickFrame(); + } while (needs_recreate); + if (has_been_recreated) { + blit_screen.Recreate(); } + const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); + scheduler.Flush(render_semaphore); + scheduler.WaitWorker(); + swapchain.Present(render_semaphore); - render_window.OnFrameDisplayed(); + gpu.RendererFrameEndNotify(); + rasterizer.TickFrame(); } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .depth = 1, }, }; - scheduler.Record( - [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); - }); + scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[image_index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); + }); } - scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], - descriptor_set = descriptor_sets[image_index], buffer = *buffer, - size = swapchain.GetSize(), pipeline = *pipeline, - layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = renderpass, - .framebuffer = framebuffer, + .renderPass = *renderpass, + .framebuffer = *framebuffers[image_index], .renderArea = { .offset = {0, 0}, @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool .extent = size, }; cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.SetViewport(0, viewport); cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[image_index], {}); cmdbuf.Draw(4, 1, 0, 0); cmdbuf.EndRenderPass(); }); @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { void VKBlitScreen::CreateSemaphores() { semaphores.resize(image_count); - std::generate(semaphores.begin(), semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); } void VKBlitScreen::CreateDescriptorPool() { @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { } void VKBlitScreen::ReleaseRawImages() { - for (std::size_t i = 0; i < raw_images.size(); ++i) { - scheduler.Wait(resource_ticks.at(i)); + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..1dd78328c 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,10 +114,13 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (tick >= cache.GetScheduler().CurrentTick()) { - cache.GetScheduler().Flush(); + auto& scheduler{cache.GetScheduler()}; + if (tick >= scheduler.CurrentTick()) { + scheduler.Flush(); + // This may not be necessary, but it's better to play it safe and assume drivers don't + // support wait before signal on vkGetQueryPoolResults + scheduler.WaitWorker(); } - u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( query.first, query.second, 1, sizeof(data), &data, sizeof(data), diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa6daeb3a..0f15ad2f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -452,10 +452,11 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { - if (draw_counter > 0) { - draw_counter = 0; - scheduler.Flush(); + if (draw_counter == 0) { + return; } + draw_counter = 0; + scheduler.Flush(); } void RasterizerVulkan::TickFrame() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 25a4933e5..81cb330d9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { command->~Command(); command = next; } - + submit = false; command_offset = 0; first = nullptr; last = nullptr; @@ -42,7 +42,7 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) master_semaphore{std::make_unique(device)}, command_pool{std::make_unique(*master_semaphore, device)} { AcquireNewChunk(); - AllocateNewContext(); + AllocateWorkerCommandBuffer(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); } @@ -60,6 +60,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { void VKScheduler::Finish(VkSemaphore semaphore) { const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); + WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); } @@ -140,75 +141,82 @@ void VKScheduler::WorkerThread() { if (quit) { continue; } - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - extracted_chunk->ExecuteAll(current_cmdbuf); - chunk_reserve.Push(std::move(extracted_chunk)); + while (!chunk_queue.Empty()) { + auto extracted_chunk = std::move(chunk_queue.Front()); + chunk_queue.Pop(); + const bool has_submit = extracted_chunk->HasSubmit(); + extracted_chunk->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); + } + chunk_reserve.Push(std::move(extracted_chunk)); + } } while (!quit); } +void VKScheduler::AllocateWorkerCommandBuffer() { + current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); +} + void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - WaitWorker(); - - std::unique_lock lock{mutex}; - - current_cmdbuf.End(); - - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); - const u32 num_signal_semaphores = semaphore ? 2U : 1U; const u64 signal_value = master_semaphore->CurrentTick(); - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - master_semaphore->NextTick(); - const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + cmdbuf.End(); - const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, - .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, - .signalSemaphoreValueCount = num_signal_semaphores, - .pSignalSemaphoreValues = signal_values.data(), - }; - const VkSubmitInfo submit_info{ - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, - .commandBufferCount = 1, - .pCommandBuffers = current_cmdbuf.address(), - .signalSemaphoreCount = num_signal_semaphores, - .pSignalSemaphores = signal_semaphores.data(), - }; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { - case VK_SUCCESS: - break; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - vk::Check(result); - } -} + const u32 num_signal_semaphores = semaphore ? 2U : 1U; -void VKScheduler::AllocateNewContext() { - std::unique_lock lock{mutex}; + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); - current_cmdbuf.Begin({ - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - .pInheritanceInfo = nullptr, + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; + + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, + .commandBufferCount = 1, + .pCommandBuffers = cmdbuf.address(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { + case VK_SUCCESS: + break; + case VK_ERROR_DEVICE_LOST: + device.ReportLoss(); + [[fallthrough]]; + default: + vk::Check(result); + } }); + chunk->MarkSubmit(); + DispatchWork(); +} +void VKScheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { query_cache->UpdateCounters(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index a40bb8bcd..40215c4c5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -86,6 +86,10 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick) { + if (tick >= master_semaphore->CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } master_semaphore->Wait(tick); } @@ -155,15 +159,24 @@ private: return true; } + void MarkSubmit() { + submit = true; + } + bool Empty() const { return command_offset == 0; } + bool HasSubmit() const { + return submit; + } + private: Command* first = nullptr; Command* last = nullptr; size_t command_offset = 0; + bool submit = false; alignas(std::max_align_t) std::array data{}; }; @@ -176,6 +189,8 @@ private: void WorkerThread(); + void AllocateWorkerCommandBuffer(); + void SubmitExecution(VkSemaphore semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..a71b0b01e 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,6 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { + needs_recreate = false; + const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { @@ -82,21 +84,20 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { resource_ticks.resize(image_count); } -bool VKSwapchain::AcquireNextImage() { +void VKSwapchain::AcquireNextImage() { const VkResult result = device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], {}, &image_index); + needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; scheduler.Wait(resource_ticks[image_index]); - return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; + resource_ticks[image_index] = scheduler.CurrentTick(); } -bool VKSwapchain::Present(VkSemaphore render_semaphore) { +void VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; - bool recreated = false; - const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, @@ -107,7 +108,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { .pImageIndices = &image_index, .pResults = nullptr, }; - switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: break; @@ -115,24 +115,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - if (current_width > 0 && current_height > 0) { - Create(current_width, current_height, current_srgb); - recreated = true; - } + needs_recreate = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); break; } - - resource_ticks[image_index] = scheduler.CurrentTick(); - frame_index = (frame_index + 1) % static_cast(image_count); - return recreated; -} - -bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { - // TODO(Rodrigo): Handle framebuffer pixel format changes - return framebuffer.width != current_width || framebuffer.height != current_height; + ++frame_index; + if (frame_index >= image_count) { + frame_index = 0; + } } void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, @@ -148,7 +140,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci{ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, .pNext = nullptr, @@ -169,7 +160,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, .clipped = VK_FALSE, .oldSwapchain = nullptr, }; - const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; const std::array queue_indices{graphics_family, present_family}; @@ -178,7 +168,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } - // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -186,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); extent = swapchain_ci.imageExtent; - current_width = extent.width; - current_height = extent.height; current_srgb = srgb; images = swapchain.GetImages(); @@ -197,8 +184,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, void VKSwapchain::CreateSemaphores() { present_semaphores.resize(image_count); - std::generate(present_semaphores.begin(), present_semaphores.end(), - [this] { return device.GetLogical().CreateSemaphore(); }); + std::ranges::generate(present_semaphores, + [this] { return device.GetLogical().CreateSemaphore(); }); } void VKSwapchain::CreateImageViews() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..b38fd9dc2 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -28,14 +28,20 @@ public: void Create(u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. - bool AcquireNextImage(); + void AcquireNextImage(); - /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be - /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore); + /// Presents the rendered image to the swapchain. + void Present(VkSemaphore render_semaphore); /// Returns true when the framebuffer layout has changed. - bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; + bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { + return extent.width != width || extent.height != height || current_srgb != is_srgb; + } + + /// Returns true when the image has to be recreated. + bool NeedsRecreate() const { + return needs_recreate; + } VkExtent2D GetSize() const { return extent; @@ -61,10 +67,6 @@ public: return image_format; } - bool GetSrgbState() const { - return current_srgb; - } - private: void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, bool srgb); @@ -92,9 +94,8 @@ private: VkFormat image_format{}; VkExtent2D extent{}; - u32 current_width{}; - u32 current_height{}; bool current_srgb{}; + bool needs_recreate{}; }; } // namespace Vulkan -- cgit v1.2.3 From f4b82b8dd70a57b5a828bcdbecf9aefd1bd240b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 1 May 2021 00:29:31 -0300 Subject: vk_graphics_pipeline: Fix texture buffer descriptors --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 76080bde1..9f5d30fe8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -253,6 +253,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; + size_t sampler_index{}; size_t image_index{}; texture_cache.SynchronizeGraphicsDescriptors(); @@ -312,11 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index] = handle.image; + image_view_indices[image_index++] = handle.image; Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; - samplers[image_index] = sampler->Handle(); - ++image_index; + samplers[sampler_index++] = sampler->Handle(); } } if constexpr (Spec::has_images) { @@ -360,10 +360,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++texture_buffer_index; } }}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + const Shader::Info& info{stage_infos[stage]}; - if constexpr (Spec::has_texture_buffers || Spec::has_image_buffers) { - buffer_cache.UnbindGraphicsTextureBuffers(stage); - } if constexpr (Spec::has_texture_buffers) { for (const auto& desc : info.texture_buffer_descriptors) { add_buffer(desc); @@ -443,7 +442,9 @@ void GraphicsPipeline::ConfigureDraw() { const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + if (bind_pipeline) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + } if (!descriptor_set_layout) { return; } -- cgit v1.2.3 From b541f5e5e333a8ec8c3569e02d67e59ad14217c2 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Mon, 19 Apr 2021 01:03:38 +0200 Subject: shader: Implement VertexA stage --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 4 ++ .../frontend/ir/microinstruction.cpp | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 1 + src/shader_recompiler/frontend/maxwell/program.cpp | 28 ++++++++ src/shader_recompiler/frontend/maxwell/program.h | 2 + src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 74 ++++++++++++++++++++++ .../global_memory_to_storage_buffer_pass.cpp | 26 ++++++++ src/shader_recompiler/ir_opt/passes.h | 7 ++ src/shader_recompiler/ir_opt/texture_pass.cpp | 21 ++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 17 ++++- 12 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 src/shader_recompiler/ir_opt/dual_vertex_pass.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 051e5d05a..151733090 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -162,6 +162,7 @@ add_library(shader_recompiler STATIC ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp + ir_opt/dual_vertex_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp ir_opt/lower_fp16_to_fp32.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4562db45b..c352bbd84 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -25,6 +25,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); void EmitSelectionMerge(EmitContext& ctx, Id merge_label); void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); void EmitUnreachable(EmitContext& ctx); void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); void EmitBarrier(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 335603f88..d3a1db340 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } +void EmitJoin(EmitContext&) { + throw NotImplementedException("Join shouldn't be emitted"); +} + void EmitUnreachable(EmitContext& ctx) { ctx.OpUnreachable(); } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 5c1b02d53..dba902186 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::LoopMerge: case Opcode::SelectionMerge: case Opcode::Return: + case Opcode::Join: case Opcode::Unreachable: case Opcode::DemoteToHelperInvocation: case Opcode::Barrier: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8f32c9e74..b14719c51 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -13,6 +13,7 @@ OPCODE(BranchConditional, Void, U1, OPCODE(LoopMerge, Void, Label, Label, ) OPCODE(SelectionMerge, Void, Label, ) OPCODE(Return, Void, ) +OPCODE(Join, Void, ) OPCODE(Unreachable, Void, ) OPCODE(DemoteToHelperInvocation, Void, Label, ) diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index aee96eae3..59897cb3e 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg); +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..f35c6478a --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -0,0 +1,74 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "common/bit_cast.h" +#include "common/bit_util.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { + +void VertexATransformPass(IR::Program& program) { + bool replaced_join{}; + bool eliminated_epilogue{}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + switch (inst.GetOpcode()) { + case IR::Opcode::Return: + inst.ReplaceOpcode(IR::Opcode::Join); + replaced_join = true; + break; + case IR::Opcode::Epilogue: + inst.Invalidate(); + eliminated_epilogue = true; + break; + default: + break; + } + if (replaced_join && eliminated_epilogue) { + return; + } + } + } +} + +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } +} + +void DualVertexJoinPass(IR::Program& program) { + const auto& blocks = program.blocks; + s64 s = static_cast(blocks.size()) - 1; + if (s < 1) { + throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + } + for (s64 index = 0; index < s; index++) { + IR::Block* const current_block = blocks[index]; + IR::Block* const next_block = blocks[index + 1]; + for (IR::Inst& inst : current_block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Join) { + IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; + ir.Branch(next_block); + inst.Invalidate(); + // only 1 join should exist + return; + } + } + } + throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 87eca2a0d..1d11a00d8 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { } } +template +static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + // TODO: Handle arrays + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast(descriptors.size()) - 1; +} + +void JoinStorageInfo(Info& base, Info& source) { + auto& descriptors = base.storage_buffers_descriptors; + for (auto& desc : source.storage_buffers_descriptors) { + auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { + return desc.cbuf_index == existing.cbuf_index && + desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; + })}; + if (it != descriptors.end()) { + it->is_written |= desc.is_written; + continue; + } + descriptors.push_back(desc); + } +} + } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 186104713..e9cb8546a 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program); void TexturePass(Environment& env, IR::Program& program); void VerificationPass(const IR::Program& program); +// Dual Vertex +void VertexATransformPass(IR::Program& program); +void VertexBTransformPass(IR::Program& program); +void DualVertexJoinPass(IR::Program& program); +void JoinTextureInfo(Info& base, Info& source); +void JoinStorageInfo(Info& base, Info& source); + } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index cfa6b34b9..2b38bcf42 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) { } } +void JoinTextureInfo(Info& base, Info& source) { + Descriptors descriptors{ + base.texture_buffer_descriptors, + base.image_buffer_descriptors, + base.texture_descriptors, + base.image_descriptors, + }; + for (auto& desc : source.texture_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_buffer_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.texture_descriptors) { + descriptors.Add(desc); + } + for (auto& desc : source.image_descriptors) { + descriptors.Add(desc); + } +} + } // namespace Shader::Optimization diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0822862fe..638475251 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -287,22 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; + bool uses_vertex_a{}; + std::size_t start_value_processing{}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } + uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + continue; + } + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); + start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From c49d56c931471f21d475a31272164fbfae5ea64a Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sat, 1 May 2021 14:56:25 +0200 Subject: shader: Address feedback --- .../frontend/maxwell/control_flow.cpp | 10 +++--- src/shader_recompiler/frontend/maxwell/program.cpp | 37 +++++++++++----------- src/shader_recompiler/frontend/maxwell/program.h | 1 + src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 20 ++++++------ .../renderer_vulkan/vk_pipeline_cache.cpp | 18 +++++------ 5 files changed, 42 insertions(+), 44 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp index 298faa03e..e7abea82f 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp @@ -209,9 +209,9 @@ CFG::CFG(Environment& env_, ObjectPool& block_pool_, Location start_addre } } if (exits_to_dispatcher) { - const auto it = functions[0].blocks.rbegin(); - dispatch_block->begin = it->end + 1; - dispatch_block->end = it->end + 1; + const auto last_block{functions[0].blocks.rbegin()}; + dispatch_block->begin = last_block->end + 1; + dispatch_block->end = last_block->end + 1; functions[0].blocks.insert(*dispatch_block); } } @@ -481,7 +481,7 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati return AnalysisState::Continue; } if (exits_to_dispatcher && function_id != 0) { - throw NotImplementedException("Dispatch EXIT on external function."); + throw NotImplementedException("Dispatch EXIT on external function"); } if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { if (block->stack.Peek(Token::PEXIT).has_value()) { @@ -490,9 +490,9 @@ CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Locati const IR::Condition cond{flow_test, static_cast(pred.index), pred.negated}; if (exits_to_dispatcher) { block->end = pc; - block->branch_true = dispatch_block; block->end_class = EndClass::Branch; block->cond = cond; + block->branch_true = dispatch_block; block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); return AnalysisState::Branch; } diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 59897cb3e..a4fa4319d 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -151,31 +151,30 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool #include -#include -#include #include "common/bit_cast.h" #include "common/bit_util.h" @@ -40,7 +38,7 @@ void VertexATransformPass(IR::Program& program) { } void VertexBTransformPass(IR::Program& program) { - for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { + for (IR::Block* const block : program.blocks) { for (IR::Inst& inst : block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Prologue) { return inst.Invalidate(); @@ -51,24 +49,24 @@ void VertexBTransformPass(IR::Program& program) { void DualVertexJoinPass(IR::Program& program) { const auto& blocks = program.blocks; - s64 s = static_cast(blocks.size()) - 1; - if (s < 1) { - throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); + const s64 sub_size = static_cast(blocks.size()) - 1; + if (sub_size < 1) { + throw LogicError("Dual Vertex Join pass failed, expected atleast 2 blocks"); } - for (s64 index = 0; index < s; index++) { - IR::Block* const current_block = blocks[index]; - IR::Block* const next_block = blocks[index + 1]; + for (s64 index = 0; index < sub_size; ++index) { + IR::Block* const current_block{blocks[index]}; + IR::Block* const next_block{blocks[index + 1]}; for (IR::Inst& inst : current_block->Instructions()) { if (inst.GetOpcode() == IR::Opcode::Join) { IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; ir.Branch(next_block); inst.Invalidate(); - // only 1 join should exist + // Only 1 join should exist return; } } } - throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); + throw LogicError("Dual Vertex Join pass failed, no join present"); } } // namespace Shader::Optimization diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 638475251..634bbb450 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -288,32 +288,32 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; - bool uses_vertex_a{}; - std::size_t start_value_processing{}; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } - uses_vertex_a |= index == 0; Shader::Environment& env{*envs[env_index]}; ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { + // Normal path programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - continue; + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; - programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); - start_value_processing = 1; } std::array infos{}; std::array modules; u32 binding{0}; - for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From 48a17298d76cd8ed3bf2b53aca1e1ac097693976 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 03:58:11 -0300 Subject: spirv: Support OpenGL uniform buffers and change bindings --- .../backend/spirv/emit_context.cpp | 102 +++++++++++++-------- src/shader_recompiler/backend/spirv/emit_context.h | 15 ++- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 2 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 98 ++++++++++++++++---- .../renderer_vulkan/vk_pipeline_cache.cpp | 7 +- 6 files changed, 168 insertions(+), 58 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 0459c3925..0eb400223 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -441,8 +441,13 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& binding) +EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& binding) : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { + const bool is_unified{profile.unified_descriptor_binding}; + u32& uniform_binding{is_unified ? binding.unified : binding.uniform_buffer}; + u32& storage_binding{is_unified ? binding.unified : binding.storage_buffer}; + u32& texture_binding{is_unified ? binding.unified : binding.texture}; + u32& image_binding{is_unified ? binding.unified : binding.image}; AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); @@ -450,12 +455,12 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineLocalMemory(program); DefineSharedMemory(program); DefineSharedMemoryFunctions(program); - DefineConstantBuffers(program.info, binding); - DefineStorageBuffers(program.info, binding); - DefineTextureBuffers(program.info, binding); - DefineImageBuffers(program.info, binding); - DefineTextures(program.info, binding); - DefineImages(program.info, binding); + DefineConstantBuffers(program.info, uniform_binding); + DefineStorageBuffers(program.info, storage_binding); + DefineTextureBuffers(program.info, texture_binding); + DefineImageBuffers(program.info, image_binding); + DefineTextures(program.info, texture_binding); + DefineImages(program.info, image_binding); DefineAttributeMemAccess(program.info); DefineGlobalMemoryFunctions(program.info); DefineLabels(program); @@ -489,6 +494,20 @@ Id EmitContext::Def(const IR::Value& value) { } } +Id EmitContext::BitOffset8(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const((offset.U32() % 4) * 8); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u)); +} + +Id EmitContext::BitOffset16(const IR::Value& offset) { + if (offset.IsImmediate()) { + return Const(((offset.U32() / 2) % 2) * 16); + } + return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); +} + void EmitContext::DefineCommonTypes(const Info& info) { void_id = TypeVoid(); @@ -496,6 +515,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); + S32.Define(*this, TypeInt(32, true), "s32"); private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); @@ -889,28 +909,36 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } - if (True(info.used_constant_buffer_types & IR::Type::U8)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); - DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); - } - if (True(info.used_constant_buffer_types & IR::Type::U16)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', sizeof(u16)); - DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', sizeof(s16)); - } - if (True(info.used_constant_buffer_types & IR::Type::U32)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', - sizeof(u32)); - } - if (True(info.used_constant_buffer_types & IR::Type::F32)) { - DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', - sizeof(f32)); - } - if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { - DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', - sizeof(u32[2])); - } - for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { - binding += desc.count; + if (profile.support_descriptor_aliasing) { + if (True(info.used_constant_buffer_types & IR::Type::U8)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); + DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); + } + if (True(info.used_constant_buffer_types & IR::Type::U16)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', + sizeof(u16)); + DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', + sizeof(s16)); + } + if (True(info.used_constant_buffer_types & IR::Type::U32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', + sizeof(u32)); + } + if (True(info.used_constant_buffer_types & IR::Type::F32)) { + DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', + sizeof(f32)); + } + if (True(info.used_constant_buffer_types & IR::Type::U32x2)) { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', + sizeof(u32[2])); + } + binding += static_cast(info.constant_buffer_descriptors.size()); + } else { + DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', + sizeof(u32[4])); + for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + binding += desc.count; + } } } @@ -920,35 +948,37 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } AddExtension("SPV_KHR_storage_buffer_storage_class"); - if (True(info.used_storage_buffer_types & IR::Type::U8)) { + const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types + : IR::Type::U32}; + if (True(used_types & IR::Type::U8)) { DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, sizeof(u8)); DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, sizeof(u8)); } - if (True(info.used_storage_buffer_types & IR::Type::U16)) { + if (True(used_types & IR::Type::U16)) { DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, sizeof(u16)); DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, sizeof(u16)); } - if (True(info.used_storage_buffer_types & IR::Type::U32)) { + if (True(used_types & IR::Type::U32)) { DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], sizeof(u32)); } - if (True(info.used_storage_buffer_types & IR::Type::F32)) { + if (True(used_types & IR::Type::F32)) { DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], sizeof(f32)); } - if (True(info.used_storage_buffer_types & IR::Type::U64)) { + if (True(used_types & IR::Type::U64)) { DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, sizeof(u64)); } - if (True(info.used_storage_buffer_types & IR::Type::U32x2)) { + if (True(used_types & IR::Type::U32x2)) { DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], sizeof(u32[2])); } - if (True(info.used_storage_buffer_types & IR::Type::U32x4)) { + if (True(used_types & IR::Type::U32x4)) { DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], sizeof(u32[4])); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c52544fb7..baf12c217 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -17,6 +17,14 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); @@ -62,6 +70,7 @@ struct UniformDefinitions { Id U32{}; Id F32{}; Id U32x2{}; + Id U32x4{}; }; struct StorageTypeDefinition { @@ -101,11 +110,14 @@ struct GenericElementInfo { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding); + explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); + [[nodiscard]] Id BitOffset8(const IR::Value& offset); + [[nodiscard]] Id BitOffset16(const IR::Value& offset); + Id Const(u32 value) { return Constant(U32[1], value); } @@ -139,6 +151,7 @@ public: Id U64{}; VectorTypes F32; VectorTypes U32; + VectorTypes S32; VectorTypes F16; VectorTypes F64; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 815b3cd95..0cb075670 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -368,7 +368,7 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, u32& binding) { +std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding) { EmitContext ctx{profile, program, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index c352bbd84..8f6482b7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -14,7 +14,7 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, - u32& binding); + Bindings& binding); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 29da2ef70..ef32184ea 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -121,7 +121,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { } Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, - const IR::Value& binding, const IR::Value& offset) { + const IR::Value& binding, const IR::Value& offset, bool check_alignment = true) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -137,13 +137,31 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; return ctx.OpLoad(result_type, access_chain); } - if (offset.U32() % element_size != 0) { + if (check_alignment && offset.U32() % element_size != 0) { throw NotImplementedException("Unaligned immediate constant buffer load"); } const Id imm_offset{ctx.Const(offset.U32() / element_size)}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; return ctx.OpLoad(result_type, access_chain); } + +Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset, + false); +} + +Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { + if (offset.IsImmediate()) { + const u32 element{(offset.U32() / 4) % 4 + index_offset}; + return ctx.OpCompositeExtract(ctx.U32[1], vector, element); + } + const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; + Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; + if (index_offset > 0) { + element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); + } + return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element); +} } // Anonymous namespace void EmitGetRegister(EmitContext&) { @@ -179,40 +197,86 @@ void EmitGetIndirectBranchVariable(EmitContext&) { } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; - return ctx.OpUConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); + } } Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; - return ctx.OpSConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset8(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); + } } Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; - return ctx.OpUConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{ + GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; + return ctx.OpUConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); + } } Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - const Id load{GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; - return ctx.OpSConvert(ctx.U32[1], load); + if (ctx.profile.support_descriptor_aliasing) { + const Id load{ + GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; + return ctx.OpSConvert(ctx.U32[1], load); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + const Id element{GetCbufElement(ctx, vector, offset, 0u)}; + const Id bit_offset{ctx.BitOffset16(offset)}; + return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); + } } Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return GetCbufElement(ctx, vector, offset, 0u); + } } Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); + } } Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { - return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset); + if (ctx.profile.support_descriptor_aliasing) { + return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, + offset); + } else { + const Id vector{GetCbufU32x4(ctx, binding, offset)}; + return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), + GetCbufElement(ctx, vector, offset, 1u)); + } } Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Const(element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; @@ -221,7 +285,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { return ctx.Const(0.0f); } const Id generic_id{ctx.input_generics.at(index)}; - const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; + const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; const Id value{ctx.OpLoad(type->id, pointer)}; return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; } @@ -232,8 +296,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { case IR::Attribute::PositionY: case IR::Attribute::PositionZ: case IR::Attribute::PositionW: - return ctx.OpLoad( - ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id())); + return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, + ctx.Const(element))); case IR::Attribute::InstanceId: if (ctx.profile.support_vertex_instance_id) { return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 634bbb450..1334882b5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -127,6 +127,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw base_profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, + .support_descriptor_aliasing = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == @@ -149,9 +150,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device.IsExtShaderViewportIndexLayerSupported(), .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), + .support_demote_to_helper_invocation = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, + .has_broken_unsigned_image_offsets = false, .generic_input_types{}, .fixed_state_point_size{}, .alpha_test_func{}, @@ -312,7 +315,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -398,7 +401,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - u32 binding{0}; + Shader::Backend::SPIRV::Bindings binding; const std::vector code{EmitSPIRV(base_profile, program, binding)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; -- cgit v1.2.3 From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 + src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + .../translate/impl/move_special_register.cpp | 7 + src/video_core/CMakeLists.txt | 4 + src/video_core/buffer_cache/buffer_cache.h | 53 ++-- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 37 ++- src/video_core/renderer_opengl/gl_buffer_cache.h | 40 ++- .../renderer_opengl/gl_compute_program.cpp | 178 +++++++++++++ .../renderer_opengl/gl_compute_program.h | 83 ++++++ src/video_core/renderer_opengl/gl_device.cpp | 89 ------- src/video_core/renderer_opengl/gl_device.h | 16 -- .../renderer_opengl/gl_graphics_program.cpp | 296 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_program.h | 105 ++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 275 ++++++++++++++++++- src/video_core/renderer_opengl/gl_shader_cache.h | 98 ++++--- .../renderer_opengl/gl_shader_manager.cpp | 146 ---------- src/video_core/renderer_opengl/gl_shader_manager.h | 73 +---- .../renderer_opengl/gl_texture_cache.cpp | 257 ++++++------------ src/video_core/renderer_opengl/gl_texture_cache.h | 29 +- src/video_core/renderer_opengl/maxwell_to_gl.h | 108 ++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 17 +- src/video_core/renderer_opengl/renderer_opengl.h | 5 +- src/video_core/renderer_opengl/util_shaders.cpp | 13 +- src/video_core/renderer_vulkan/pipeline_helper.h | 17 -- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 - src/video_core/shader_cache.cpp | 17 ++ src/video_core/shader_cache.h | 23 +- src/video_core/shader_environment.cpp | 4 +- src/video_core/shader_environment.h | 16 -- src/video_core/texture_cache/formatter.cpp | 4 +- src/video_core/texture_cache/formatter.h | 3 +- src/video_core/textures/texture.h | 9 + src/video_core/vulkan_common/vulkan_device.cpp | 2 +- 38 files changed, 1427 insertions(+), 705 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a..5913fdeff 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + U32 IREmitter::LocalInvocationIdX() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d..a12919283 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); + [[nodiscard]] Value LocalInvocationId(); [[nodiscard]] U32 LocalInvocationIdX(); [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f5..b008c37c0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,10 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_program.cpp + renderer_opengl/gl_compute_program.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_program.cpp + renderer_opengl/gl_graphics_program.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..2d0ef1307 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint base_binding = graphics_base_storage_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97..4986c65fd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector views; }; class BufferCacheRuntime { @@ -75,13 +85,19 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { if (use_assembly_shaders) { const GLuint handle = fast_uniforms[stage][binding_index].handle; const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, fast_uniforms[stage][binding_index].handle, 0, @@ -103,7 +119,7 @@ public: std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast(offset), static_cast(size)); @@ -118,6 +134,19 @@ public: return has_fast_buffer_sub_data; } + void SetBaseUniformBindings(const std::array& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -133,6 +162,11 @@ private: u32 max_attributes = 0; + std::array graphics_base_uniform_bindings{}; + std::array graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional stream_buffer; std::array, @@ -155,8 +189,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 000000000..d5ef65439 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputeProgramKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, + program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputeProgram::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 000000000..64a75d44d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputeProgramKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputeProgramKey&) const noexcept; + + bool operator!=(const ComputeProgramKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputeProgram { +public: + explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + OGLProgram program; + Shader::Info info; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..18bbc4c1f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -22,34 +22,11 @@ namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template T GetInteger(GLenum pname) { @@ -82,15 +59,6 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - std::array BuildMaxUniformBuffers() noexcept { std::array max; std::ranges::transform(LIMIT_UBOS, max.begin(), @@ -98,62 +66,6 @@ std::array BuildMaxUniformBuffers() noexcep return max; } -std::array BuildBaseBindings() noexcept { - std::array bindings; - - static constexpr std::array stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { @@ -225,7 +137,6 @@ Device::Device() { } max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..152a3acd3 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -12,13 +12,6 @@ namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); explicit Device(std::nullptr_t); @@ -28,14 +21,6 @@ public: return max_uniform_buffers[static_cast(shader_type)]; } - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast(shader_type)); - } - size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -134,7 +119,6 @@ private: std::string vendor_name; std::array max_uniform_buffers{}; - std::array base_bindings{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 000000000..fd0958719 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -0,0 +1,296 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +size_t GraphicsProgramKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + const std::array& infos) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + const auto& info{stage_infos[stage]}; + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + for (const auto& desc : info.constant_buffer_descriptors) { + base_uniform_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.storage_buffers_descriptors) { + base_storage_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers[stage] += desc.count; + num_textures += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers[stage] += desc.count; + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsProgram::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + texture_cache.UpdateRenderTargets(false); + + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 000000000..5adf3f41e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsProgramKey&) const noexcept; + + bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { +public: + explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, const std::array& infos); + + void Configure(bool is_indexed); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863..e527b76ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + if (!program) { + return; + } + program->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40..c9ca1f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,11 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" @@ -25,17 +30,281 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { +namespace { +// FIXME: Move this somewhere else +const Shader::Profile profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = true, + .support_viewport_mask = true, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .warp_size_potentially_larger_than_guest = true, + .support_int64_atomics = false, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + + .generic_input_types = {}, + .convert_depth_mode = false, + .force_early_z = false, + + .tess_primitive = {}, + .tess_spacing = {}, + .tess_clockwise = false, + + .input_topology = Shader::InputTopology::Triangles, + + .fixed_state_point_size = std::nullopt, + + .alpha_test_func = Shader::CompareFunction::Always, + .alpha_test_reference = 0.0f, + + .y_negate = false, + + .xfb_varyings = {}, +}; + +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::GraphicsEnvironment; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +void AddShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); + + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (!Settings::values.renderer_debug) { + return; + } + GLint shader_status{}; + glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} +} // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ + state_tracker_} {} ShaderCache::~ShaderCache() = default; +GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { + if (!RefreshStages(graphics_key.unique_hashes)) { + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& program{pair->second}; + if (is_new) { + program = CreateGraphicsProgram(); + } + return program.get(); +} + +ComputeProgram* ShaderCache::CurrentComputeProgram() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute.launch_description}; + const ComputeProgramKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputeProgram(key, shader); + return pipeline.get(); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + + Shader::Backend::SPIRV::Bindings binding; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const std::vector code{EmitSPIRV(profile, program, binding)}; + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + AddShader(Stage(stage_index), gl_program.handle, code); + } + LinkProgram(gl_program.handle); + + return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, + program_manager, state_tracker, std::move(gl_program), + infos); +} + +std::unique_ptr ShaderCache::CreateComputeProgram( + const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + return CreateComputeProgram(main_pools, key, env, true); +} + +std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + Shader::Backend::SPIRV::Bindings binding; + const std::vector code{EmitSPIRV(profile, program, binding)}; + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); + LinkProgram(gl_program.handle); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, + program_manager, std::move(gl_program), program.info); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17c..b479d073a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,20 +5,18 @@ #pragma once #include -#include -#include -#include -#include -#include #include -#include -#include #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -32,64 +30,62 @@ class EmuWindow; namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - - std::array unique_hashes; - std::array color_formats; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, u32> tessellation_primitive; - BitField<8, 2, u32> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - u32 padding; - TransformFeedbackState xfb_state; - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { -public: -private: + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; }; class ShaderCache : public VideoCommon::ShaderCache { public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + + [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + private: + std::unique_ptr CreateGraphicsProgram(); + + std::unique_ptr CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, + std::span envs, bool build_in_parallel); + + std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, + const VideoCommon::ShaderInfo* shader); + + std::unique_ptr CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel); + Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + + GraphicsProgramKey graphics_key{}; + + ShaderPools main_pools; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..70781d6f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,24 @@ #pragma once -#include - #include -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); - - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); + void BindProgram(GLuint program) { + if (bound_program == program) { + return; + } + bound_program = program; + glUseProgram(program); + } - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); - - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); + void RestoreGuestCompute() {} private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; - }; - - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; - - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; + GLuint bound_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf84218..7053be161 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arrayflags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array new_handles; glGenTextures(static_cast(new_handles.size()), new_handles.data()); @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } null_image_views[static_cast(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } VideoCommon::SubresourceRange flatten_range = info.range; std::array handles; @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI [[fallthrough]]; case ImageViewType::e1D: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); break; case ImageViewType::e2DArray: flatten_range.extent.layers = 1; @@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .extent = {.levels = 1, .layers = 1}, }; glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + } else { + glGenTextures(2, handles.data()); + SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, + info.range); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); break; case ImageViewType::CubeArray: flatten_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} @@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); + const std::string name = VideoCommon::Name(*this); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } stored_views.emplace_back().handle = handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e650..2e3e02b79 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array null_image_views; + std::array null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast(query_type)]; + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -198,15 +197,25 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range); - std::array views{}; + std::array views{}; std::vector stored_views; - GLuint default_handle = 0; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..4e77ef808 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, - program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { OGLShader fragment_shader; fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); // Generate presentation sampler present_sampler.Create(); @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindProgram(present_program.handle); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..b3ee55665 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_program; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..51e72b705 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,7 +16,6 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { - main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5..78bf90c48 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { return MakeShaderInfo(env, *cpu_shader_addr); } +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + ShaderInfo* ShaderCache::TryGet(VAddr addr) const { std::scoped_lock lock{lookup_mutex}; diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc84..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,14 +4,18 @@ #pragma once +#include +#include #include #include +#include #include #include #include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" namespace Tegra { class MemoryManager; @@ -30,6 +34,8 @@ class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; @@ -58,6 +64,15 @@ public: void SyncGuestHost(); protected: + struct GraphicsEnvironments { + std::array envs; + std::array env_ptrs; + + std::span Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); + } + }; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_); @@ -65,17 +80,21 @@ protected: /// @brief Update the hashes and information of shader stages /// @param unique_hashes Shader hashes to store into when a stage is enabled /// @return True no success, false on error - bool RefreshStages(std::array& unique_hashes); + bool RefreshStages(std::array& unique_hashes); /// @brief Returns information about the current compute shader /// @return Pointer to a valid shader, nullptr on error const ShaderInfo* ComputeShader(); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes); + Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; - std::array shader_infos{}; + std::array shader_infos{}; bool last_shaders_valid = false; private: diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc0097..c93174519 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -187,8 +187,8 @@ std::optional GenericEnvironment::TryFindSize() { Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d712045..d26dbfaab 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -29,22 +29,6 @@ class Memorymanager; namespace VideoCommon { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - if (via_header_index) { - image = data; - sampler = data; - } else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda..e27a2b51e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, -- cgit v1.2.3 From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: Move SPIR-V emission functions to their own header --- src/shader_recompiler/CMakeLists.txt | 2 + src/shader_recompiler/backend/bindings.h | 19 + src/shader_recompiler/backend/spirv/emit_context.h | 9 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 571 +------------------- .../backend/spirv/emit_spirv_atomic.cpp | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 1 + .../spirv/emit_spirv_bitwise_conversion.cpp | 1 + .../backend/spirv/emit_spirv_composite.cpp | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 1 + .../backend/spirv/emit_spirv_convert.cpp | 1 + .../backend/spirv/emit_spirv_floating_point.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 1 + .../backend/spirv/emit_spirv_image_atomic.cpp | 1 + .../backend/spirv/emit_spirv_instructions.h | 583 +++++++++++++++++++++ .../backend/spirv/emit_spirv_integer.cpp | 1 + .../backend/spirv/emit_spirv_logical.cpp | 1 + .../backend/spirv/emit_spirv_memory.cpp | 1 + .../backend/spirv/emit_spirv_select.cpp | 1 + .../backend/spirv/emit_spirv_shared_memory.cpp | 1 + .../backend/spirv/emit_spirv_special.cpp | 1 + .../backend/spirv/emit_spirv_undefined.cpp | 1 + .../backend/spirv/emit_spirv_warp.cpp | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- 26 files changed, 637 insertions(+), 579 deletions(-) create mode 100644 src/shader_recompiler/backend/bindings.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 0bcd714d6..6523615aa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(shader_recompiler STATIC + backend/bindings.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -13,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_image_atomic.cpp + backend/spirv/emit_spirv_instructions.h backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 30b08104d..8b000f1ec 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" @@ -17,14 +18,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -struct Bindings { - u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3f9adc902..0681dfd16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -9,6 +9,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 47d62b190..d8ab2d8ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,9 +4,12 @@ #pragma once +#include + #include #include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -16,569 +19,9 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding); -// Microinstruction emitters -Id EmitPhi(EmitContext& ctx, IR::Inst* inst); -void EmitVoid(EmitContext& ctx); -Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); -void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); -void EmitBarrier(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); -void EmitPrologue(EmitContext& ctx); -void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); -void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); -void EmitGetRegister(EmitContext& ctx); -void EmitSetRegister(EmitContext& ctx); -void EmitGetPred(EmitContext& ctx); -void EmitSetPred(EmitContext& ctx); -void EmitSetGotoVariable(EmitContext& ctx); -void EmitGetGotoVariable(EmitContext& ctx); -void EmitSetIndirectBranchVariable(EmitContext& ctx); -void EmitGetIndirectBranchVariable(EmitContext& ctx); -Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -void EmitGetZFlag(EmitContext& ctx); -void EmitGetSFlag(EmitContext& ctx); -void EmitGetCFlag(EmitContext& ctx); -void EmitGetOFlag(EmitContext& ctx); -void EmitSetZFlag(EmitContext& ctx); -void EmitSetSFlag(EmitContext& ctx); -void EmitSetCFlag(EmitContext& ctx); -void EmitSetOFlag(EmitContext& ctx); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitIsHelperInvocation(EmitContext& ctx); -Id EmitYDirection(EmitContext& ctx); -Id EmitLoadLocal(EmitContext& ctx, Id word_offset); -void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); -void EmitLoadGlobalU8(EmitContext& ctx); -void EmitLoadGlobalS8(EmitContext& ctx); -void EmitLoadGlobalU16(EmitContext& ctx); -void EmitLoadGlobalS16(EmitContext& ctx); -Id EmitLoadGlobal32(EmitContext& ctx, Id address); -Id EmitLoadGlobal64(EmitContext& ctx, Id address); -Id EmitLoadGlobal128(EmitContext& ctx, Id address); -void EmitWriteGlobalU8(EmitContext& ctx); -void EmitWriteGlobalS8(EmitContext& ctx); -void EmitWriteGlobalU16(EmitContext& ctx); -void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); -Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); -Id EmitLoadSharedU32(EmitContext& ctx, Id offset); -Id EmitLoadSharedU64(EmitContext& ctx, Id offset); -Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -void EmitCompositeConstructF64x2(EmitContext& ctx); -void EmitCompositeConstructF64x3(EmitContext& ctx); -void EmitCompositeConstructF64x4(EmitContext& ctx); -void EmitCompositeExtractF64x2(EmitContext& ctx); -void EmitCompositeExtractF64x3(EmitContext& ctx); -void EmitCompositeExtractF64x4(EmitContext& ctx); -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitBitCastU16F16(EmitContext& ctx); -Id EmitBitCastU32F32(EmitContext& ctx, Id value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); -Id EmitPackUint2x32(EmitContext& ctx, Id value); -Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x16(EmitContext& ctx, Id value); -Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); -Id EmitPackHalf2x16(EmitContext& ctx, Id value); -Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); -Id EmitPackDouble2x32(EmitContext& ctx, Id value); -Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); -void EmitGetZeroFromOp(EmitContext& ctx); -void EmitGetSignFromOp(EmitContext& ctx); -void EmitGetCarryFromOp(EmitContext& ctx); -void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitGetSparseFromOp(EmitContext& ctx); -void EmitGetInBoundsFromOp(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitIAbs64(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitGlobalAtomicIAdd32(EmitContext& ctx); -Id EmitGlobalAtomicSMin32(EmitContext& ctx); -Id EmitGlobalAtomicUMin32(EmitContext& ctx); -Id EmitGlobalAtomicSMax32(EmitContext& ctx); -Id EmitGlobalAtomicUMax32(EmitContext& ctx); -Id EmitGlobalAtomicInc32(EmitContext& ctx); -Id EmitGlobalAtomicDec32(EmitContext& ctx); -Id EmitGlobalAtomicAnd32(EmitContext& ctx); -Id EmitGlobalAtomicOr32(EmitContext& ctx); -Id EmitGlobalAtomicXor32(EmitContext& ctx); -Id EmitGlobalAtomicExchange32(EmitContext& ctx); -Id EmitGlobalAtomicIAdd64(EmitContext& ctx); -Id EmitGlobalAtomicSMin64(EmitContext& ctx); -Id EmitGlobalAtomicUMin64(EmitContext& ctx); -Id EmitGlobalAtomicSMax64(EmitContext& ctx); -Id EmitGlobalAtomicUMax64(EmitContext& ctx); -Id EmitGlobalAtomicInc64(EmitContext& ctx); -Id EmitGlobalAtomicDec64(EmitContext& ctx); -Id EmitGlobalAtomicAnd64(EmitContext& ctx); -Id EmitGlobalAtomicOr64(EmitContext& ctx); -Id EmitGlobalAtomicXor64(EmitContext& ctx); -Id EmitGlobalAtomicExchange64(EmitContext& ctx); -Id EmitGlobalAtomicAddF32(EmitContext& ctx); -Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); -Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitBindlessImageSampleImplicitLod(EmitContext&); -Id EmitBindlessImageSampleExplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); -Id EmitBindlessImageGather(EmitContext&); -Id EmitBindlessImageGatherDref(EmitContext&); -Id EmitBindlessImageFetch(EmitContext&); -Id EmitBindlessImageQueryDimensions(EmitContext&); -Id EmitBindlessImageQueryLod(EmitContext&); -Id EmitBindlessImageGradient(EmitContext&); -Id EmitBindlessImageRead(EmitContext&); -Id EmitBindlessImageWrite(EmitContext&); -Id EmitBoundImageSampleImplicitLod(EmitContext&); -Id EmitBoundImageSampleExplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); -Id EmitBoundImageGather(EmitContext&); -Id EmitBoundImageGatherDref(EmitContext&); -Id EmitBoundImageFetch(EmitContext&); -Id EmitBoundImageQueryDimensions(EmitContext&); -Id EmitBoundImageQueryLod(EmitContext&); -Id EmitBoundImageGradient(EmitContext&); -Id EmitBoundImageRead(EmitContext&); -Id EmitBoundImageWrite(EmitContext&); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset); -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); -Id EmitBindlessImageAtomicIAdd32(EmitContext&); -Id EmitBindlessImageAtomicSMin32(EmitContext&); -Id EmitBindlessImageAtomicUMin32(EmitContext&); -Id EmitBindlessImageAtomicSMax32(EmitContext&); -Id EmitBindlessImageAtomicUMax32(EmitContext&); -Id EmitBindlessImageAtomicInc32(EmitContext&); -Id EmitBindlessImageAtomicDec32(EmitContext&); -Id EmitBindlessImageAtomicAnd32(EmitContext&); -Id EmitBindlessImageAtomicOr32(EmitContext&); -Id EmitBindlessImageAtomicXor32(EmitContext&); -Id EmitBindlessImageAtomicExchange32(EmitContext&); -Id EmitBoundImageAtomicIAdd32(EmitContext&); -Id EmitBoundImageAtomicSMin32(EmitContext&); -Id EmitBoundImageAtomicUMin32(EmitContext&); -Id EmitBoundImageAtomicSMax32(EmitContext&); -Id EmitBoundImageAtomicUMax32(EmitContext&); -Id EmitBoundImageAtomicInc32(EmitContext&); -Id EmitBoundImageAtomicDec32(EmitContext&); -Id EmitBoundImageAtomicAnd32(EmitContext&); -Id EmitBoundImageAtomicOr32(EmitContext&); -Id EmitBoundImageAtomicXor32(EmitContext&); -Id EmitBoundImageAtomicExchange32(EmitContext&); -Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitLaneId(EmitContext& ctx); -Id EmitVoteAll(EmitContext& ctx, Id pred); -Id EmitVoteAny(EmitContext& ctx, Id pred); -Id EmitVoteEqual(EmitContext& ctx, Id pred); -Id EmitSubgroupBallot(EmitContext& ctx, Id pred); -Id EmitSubgroupEqMask(EmitContext& ctx); -Id EmitSubgroupLtMask(EmitContext& ctx); -Id EmitSubgroupLeMask(EmitContext& ctx); -Id EmitSubgroupGtMask(EmitContext& ctx); -Id EmitSubgroupGeMask(EmitContext& ctx); -Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); - -Id EmitDPdxFine(EmitContext& ctx, Id op_a); - -Id EmitDPdyFine(EmitContext& ctx, Id op_a); - -Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); - -Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, program, binding); +} } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 6e17d1c7e..053800eb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 705aebd81..e0b52a001 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 93a45d834..bb11f4f4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 079e226de..10ff4ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ef32184ea..8e57ff070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,6 +6,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index b4a6fbb93..6154c46be 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index acb8957fe..fd74e475f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index b3afbef25..61cf25f9c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6680cf1b3..5832104df 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp index 05bed22b9..d7f1a365a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..b5eec3cd1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,583 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" + +namespace IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 86e6a4f3b..06ab23b1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index bb434def2..b9a9500fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a6a3f3351..37a66095f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0b45db45e..c5b4f4720 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 710d1cd25..9a79fc7a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d5430e905..ba948f3c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 19b06dbe4..c9f469e90 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 239e2ecab..78b1e1ba7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c9ca1f005..6585817bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,7 +254,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( OGLProgram gl_program; gl_program.handle = glCreateProgram(); - Shader::Backend::SPIRV::Bindings binding; + Shader::Backend::Bindings binding; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -297,8 +297,7 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, program)}; OGLProgram gl_program; gl_program.handle = glCreateProgram(); AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { -- cgit v1.2.3 From bfa47539f6d5779a80d6fb23ae49c1d34e01ae93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 5 May 2021 01:08:16 -0300 Subject: gl_shader_cache: Remove code unintentionally committed --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6585817bc..9bbdfeb62 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,9 +266,6 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( infos[stage_index] = &program.info; const std::vector code{EmitSPIRV(profile, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); AddShader(Stage(stage_index), gl_program.handle, code); } LinkProgram(gl_program.handle); -- cgit v1.2.3 From a51503660435f1279ce0fa449f9cf76e74b45d74 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:08 -0300 Subject: vk_master_semaphore: Use fetch_add to increase master semaphore tick --- src/video_core/renderer_vulkan/vk_master_semaphore.h | 6 +++--- src/video_core/renderer_vulkan/vk_scheduler.cpp | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -39,9 +39,9 @@ public: return KnownGpuTick() >= tick; } - /// Advance to the logical tick. - void NextTick() noexcept { - ++current_tick; + /// Advance to the logical tick and return the old one + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order::relaxed); } /// Refresh the known GPU tick diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 81cb330d9..fcb6a5911 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -168,9 +168,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { EndPendingOperations(); InvalidateState(); - const u64 signal_value = master_semaphore->CurrentTick(); - master_semaphore->NextTick(); - + const u64 signal_value = master_semaphore->NextTick(); Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); -- cgit v1.2.3 From 56c47951c5d92d5e6145060469528301c67e0754 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 00:29:37 -0300 Subject: vk_query_cache: Wait before reading queries --- src/video_core/renderer_vulkan/vk_query_cache.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 1dd78328c..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -114,17 +114,10 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - auto& scheduler{cache.GetScheduler()}; - if (tick >= scheduler.CurrentTick()) { - scheduler.Flush(); - // This may not be necessary, but it's better to play it safe and assume drivers don't - // support wait before signal on vkGetQueryPoolResults - scheduler.WaitWorker(); - } + cache.GetScheduler().Wait(tick); u64 data; const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( - query.first, query.second, 1, sizeof(data), &data, sizeof(data), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); switch (query_result) { case VK_SUCCESS: -- cgit v1.2.3 From 36f158626726f940d9dba22a2b03ebbb5aa41c5e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 7 May 2021 06:26:12 -0300 Subject: vk_scheduler: Use locks instead of SPSC a queue This tries to fix a data race where we'd wait forever for the GPU. --- src/video_core/renderer_vulkan/vk_scheduler.cpp | 58 ++++++++++++++----------- src/video_core/renderer_vulkan/vk_scheduler.h | 16 ++++--- 2 files changed, 42 insertions(+), 32 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fcb6a5911..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -47,8 +47,11 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) } VKScheduler::~VKScheduler() { - quit = true; - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + quit = true; + } + work_cv.notify_all(); worker_thread.join(); } @@ -69,20 +72,19 @@ void VKScheduler::WaitWorker() { MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); - bool finished = false; - do { - cv.notify_all(); - std::unique_lock lock{mutex}; - finished = chunk_queue.Empty(); - } while (!finished); + std::unique_lock lock{work_mutex}; + wait_cv.wait(lock, [this] { return work_queue.empty(); }); } void VKScheduler::DispatchWork() { if (chunk->Empty()) { return; } - chunk_queue.Push(std::move(chunk)); - cv.notify_all(); + { + std::lock_guard lock{work_mutex}; + work_queue.push(std::move(chunk)); + } + work_cv.notify_one(); AcquireNewChunk(); } @@ -135,22 +137,27 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { void VKScheduler::WorkerThread() { Common::SetCurrentThreadName("yuzu:VulkanWorker"); - std::unique_lock lock{mutex}; do { - cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); - if (quit) { - continue; + if (work_queue.empty()) { + wait_cv.notify_all(); } - while (!chunk_queue.Empty()) { - auto extracted_chunk = std::move(chunk_queue.Front()); - chunk_queue.Pop(); - const bool has_submit = extracted_chunk->HasSubmit(); - extracted_chunk->ExecuteAll(current_cmdbuf); - if (has_submit) { - AllocateWorkerCommandBuffer(); + std::unique_ptr work; + { + std::unique_lock lock{work_mutex}; + work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); + if (quit) { + continue; } - chunk_reserve.Push(std::move(extracted_chunk)); + work = std::move(work_queue.front()); + work_queue.pop(); + } + const bool has_submit = work->HasSubmit(); + work->ExecuteAll(current_cmdbuf); + if (has_submit) { + AllocateWorkerCommandBuffer(); } + std::lock_guard reserve_lock{reserve_mutex}; + chunk_reserve.push_back(std::move(work)); } while (!quit); } @@ -269,12 +276,13 @@ void VKScheduler::EndRenderPass() { } void VKScheduler::AcquireNewChunk() { - if (chunk_reserve.Empty()) { + std::lock_guard lock{reserve_mutex}; + if (chunk_reserve.empty()) { chunk = std::make_unique(); return; } - chunk = std::move(chunk_reserve.Front()); - chunk_reserve.Pop(); + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 40215c4c5..6600fb142 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,14 +6,14 @@ #include #include +#include #include #include -#include #include #include + #include "common/alignment.h" #include "common/common_types.h" -#include "common/threadsafe_queue.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -220,11 +220,13 @@ private: std::array renderpass_images{}; std::array renderpass_image_ranges{}; - Common::SPSCQueue> chunk_queue; - Common::SPSCQueue> chunk_reserve; - std::mutex mutex; - std::condition_variable cv; - bool quit = false; + std::queue> work_queue; + std::vector> chunk_reserve; + std::mutex reserve_mutex; + std::mutex work_mutex; + std::condition_variable work_cv; + std::condition_variable wait_cv; + std::atomic_bool quit{}; }; } // namespace Vulkan -- cgit v1.2.3 From 2c81ad831192a8234e26a61706f18b460999c89f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:34:41 -0300 Subject: glasm: Initial GLASM compute implementation for testing --- .../renderer_opengl/gl_compute_program.cpp | 17 +++++++--- .../renderer_opengl/gl_compute_program.h | 7 ++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 37 ++++++++++++++++++---- 3 files changed, 47 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index d5ef65439..fb54618a4 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -29,11 +29,11 @@ bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_) + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, - program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { for (const auto& desc : info.texture_buffer_descriptors) { num_texture_buffers += desc.count; } @@ -124,6 +124,14 @@ void ComputeProgram::Configure() { const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); + if (assembly_program.handle != 0) { + // FIXME: State track this + glEnable(GL_COMPUTE_PROGRAM_NV); + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(source_program.handle); + } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; const auto add_buffer{[&](const auto& desc) { @@ -172,7 +180,6 @@ void ComputeProgram::Configure() { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - program_manager.BindProgram(program.handle); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h index 64a75d44d..ddb00dc1d 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -52,8 +52,8 @@ public: explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_); + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); @@ -64,8 +64,9 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; ProgramManager& program_manager; - OGLProgram program; Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9bbdfeb62..d9f0bca78 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -89,6 +90,7 @@ const Shader::Profile profile{ .xfb_varyings = {}, }; +using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -151,6 +153,22 @@ void LinkProgram(GLuint program) { } } +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (!Settings::values.renderer_debug) { + return program; + } + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + LOG_CRITICAL(Render_OpenGL, "{}", err); + LOG_INFO(Render_OpenGL, "{}", code); + } + return program; +} + GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -294,13 +312,20 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(profile, program)}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); - LinkProgram(gl_program.handle); + OGLAssemblyProgram asm_program; + OGLProgram source_program; + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + const std::vector code{EmitSPIRV(profile, program)}; + source_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, std::move(gl_program), program.info); + program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL -- cgit v1.2.3 From dc02cb92e43d2ef05197e4edb2573116d7ae58c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:36:51 -0300 Subject: gl_rasterizer: Flush L2 caches before glFlush on GLASM --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e527b76ba..4834d58f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -473,6 +473,14 @@ void RasterizerOpenGL::FlushCommands() { return; } num_queued_commands = 0; + + // Make sure memory stored from the previous GL command stream is visible + // This is only needed on assembly shaders where we write to GPU memory with raw pointers + // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used + // and prefer using NV_shader_storage_buffer_object when possible + if (Settings::values.use_assembly_shaders.GetValue()) { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + } glFlush(); } -- cgit v1.2.3 From 01e18581b9bdba021e6a4ae8fa0f0987ceea3e49 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 9 May 2021 03:09:55 -0300 Subject: vk_pipeline_cache: Enable int8 and int16 types on Vulkan --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5edcd072..7830c0194 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -128,6 +128,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, + .support_int8 = true, + .support_int16 = true, .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From 568d813eeadfc7888bad72d4f5d695c9d745cfe5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 May 2021 00:43:19 -0300 Subject: vk_update_descriptor: Properly initialize payload on the update descriptor queue --- src/video_core/renderer_vulkan/vk_update_descriptor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index ce3427c9b..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -15,7 +15,9 @@ namespace Vulkan { VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) - : device{device_}, scheduler{scheduler_} {} + : device{device_}, scheduler{scheduler_} { + payload_cursor = payload.data(); +} VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; -- cgit v1.2.3 From 258f2dec1bc6f1f9d966579c1efb96f76d947060 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:19:08 -0300 Subject: opengl: Initial (broken) support to GLASM shaders --- .../renderer_opengl/gl_graphics_program.cpp | 15 ++++++- .../renderer_opengl/gl_graphics_program.h | 6 ++- src/video_core/renderer_opengl/gl_shader_cache.cpp | 46 ++++++++++++++++------ 3 files changed, 53 insertions(+), 14 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index fd0958719..7c0bf7bc8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -33,10 +33,12 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, + std::array assembly_programs_, const std::array& infos) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)} { + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -290,7 +292,16 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindProgram(program.handle); + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 5adf3f41e..58aa4b0bc 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -73,7 +73,9 @@ public: Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, const std::array& infos); + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos); void Configure(bool is_indexed); @@ -86,6 +88,8 @@ private: StateTracker& state_tracker; OGLProgram program; + std::array assembly_programs; + std::array stage_infos{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d9f0bca78..c10ea2f60 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -185,6 +185,23 @@ GLenum Stage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -269,10 +286,12 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } std::array infos{}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - + OGLProgram source_program; + std::array assembly_programs; Shader::Backend::Bindings binding; + if (!device.UseAssemblyShaders()) { + source_program.handle = glCreateProgram(); + } for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -282,15 +301,20 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - - const std::vector code{EmitSPIRV(profile, program, binding)}; - AddShader(Stage(stage_index), gl_program.handle, code); + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + } else { + const std::vector code{EmitSPIRV(profile, program, binding)}; + AddShader(Stage(stage_index), source_program.handle, code); + } } - LinkProgram(gl_program.handle); - - return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, - program_manager, state_tracker, std::move(gl_program), - infos); + if (!device.UseAssemblyShaders()) { + LinkProgram(source_program.handle); + } + return std::make_unique( + texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + std::move(source_program), std::move(assembly_programs), infos); } std::unique_ptr ShaderCache::CreateComputeProgram( -- cgit v1.2.3 From 8b7d5912d61d56f65fb7e3a03bba544a4c40bfa6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:04:09 -0300 Subject: glasm: Support textures used in more than one stage --- src/shader_recompiler/backend/glasm/emit_context.cpp | 10 +++++++++- src/shader_recompiler/backend/glasm/emit_context.h | 14 +++++++++++++- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 4 ++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 4903e9d8e..d1fe84a5f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -4,6 +4,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" @@ -22,7 +23,7 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program) { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings) : info{program.info} { // FIXME: Temporary partial implementation u32 cbuf_index{}; for (const auto& desc : program.info.constant_buffer_descriptors) { @@ -79,6 +80,13 @@ EmitContext::EmitContext(IR::Program& program) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } + const size_t num_textures{program.info.texture_descriptors.size()}; + texture_bindings.resize(num_textures); + for (size_t index = 0; index < num_textures; ++index) { + const auto& desc{program.info.texture_descriptors[index]}; + texture_bindings[index] = bindings.texture; + bindings.texture += desc.count; + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 4efe42ada..084635c77 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -6,11 +6,20 @@ #include #include +#include #include #include "shader_recompiler/backend/glasm/reg_alloc.h" +namespace Shader { +struct Info; +} + +namespace Shader::Backend { +struct Bindings; +} + namespace Shader::IR { class Inst; struct Program; @@ -20,7 +29,7 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program); + explicit EmitContext(IR::Program& program, Bindings& bindings); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -45,6 +54,9 @@ public: std::string code; RegAlloc reg_alloc{*this}; + const Info& info; + + std::vector texture_bindings; std::string_view stage_name = "invalid"; }; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index a893fa3fb..edf6f5e13 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -312,8 +312,8 @@ std::string_view StageHeader(Stage stage) { } } // Anonymous namespace -std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { - EmitContext ctx{program}; +std::string EmitGLASM(const Profile&, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c10ea2f60..b84b36b9d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -302,7 +302,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, program, binding)}; -- cgit v1.2.3 From 85fc7e584ef9d64bae3269e7993bbf919bd10640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:07:18 -0300 Subject: HACK: Bind stages before and after bindings Works around a bug where program parameters are only applied to the current stage, and this one wasn't bound at the moment. Affects all SSBO usages on GLASM. --- src/video_core/renderer_opengl/gl_graphics_program.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 7c0bf7bc8..4ac026502 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -240,6 +240,17 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + // FIXME: Unhack this + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; -- cgit v1.2.3 From c5ca4fe451c398542f4f6c5e468e0bb96866175d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:53:51 -0300 Subject: renderer_opengl: State track assembly programs --- .../renderer_opengl/gl_graphics_program.cpp | 27 +++--------- .../renderer_opengl/gl_graphics_program.h | 1 + src/video_core/renderer_opengl/gl_shader_manager.h | 51 ++++++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 4ac026502..b5d75aa13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -42,6 +42,9 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } u32 num_textures{}; u32 num_images{}; for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { @@ -182,6 +185,9 @@ void GraphicsProgram::Configure(bool is_indexed) { const std::span indices_span(image_view_indices.data(), image_view_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + ImageId* texture_buffer_index{image_view_ids.data()}; const auto bind_stage_info{[&](size_t stage) { size_t index{}; @@ -240,14 +246,8 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - // FIXME: Unhack this if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindProgram(program.handle); } @@ -300,19 +300,6 @@ void GraphicsProgram::Configure(bool is_indexed) { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - texture_cache.UpdateRenderTargets(false); - - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); - } else { - program_manager.BindProgram(program.handle); - } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 58aa4b0bc..18292bb16 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -89,6 +89,7 @@ private: OGLProgram program; std::array assembly_programs; + u32 enabled_stages_mask{}; std::array stage_infos{}; std::array base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 70781d6f5..48669b3cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,24 +4,69 @@ #pragma once +#include +#include + #include +#include "video_core/renderer_opengl/gl_resource_manager.h" + +#pragma optimize("", off) + namespace OpenGL { class ProgramManager { + static constexpr size_t NUM_STAGES = 5; + + static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + public: void BindProgram(GLuint program) { - if (bound_program == program) { + if (current_source_program == program) { return; } - bound_program = program; + current_source_program = program; glUseProgram(program); } + void BindAssemblyPrograms(std::span programs, + u32 stage_mask) { + const u32 changed_mask = current_assembly_mask ^ stage_mask; + current_assembly_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_assembly_programs[stage] != programs[stage].handle) { + current_assembly_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void RestoreGuestCompute() {} private: - GLuint bound_program = 0; + GLuint current_source_program = 0; + + u32 current_assembly_mask = 0; + std::array current_assembly_programs; }; } // namespace OpenGL -- cgit v1.2.3 From 690b1841e6a1437335c0aae6d934f3fdcdb1680c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:59:05 -0300 Subject: renderer_opengl: State track compute assembly programs --- src/video_core/renderer_opengl/gl_compute_program.cpp | 5 +---- src/video_core/renderer_opengl/gl_shader_manager.h | 19 +++++++++++++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index fb54618a4..ce52a0052 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -125,10 +125,7 @@ void ComputeProgram::Configure() { texture_cache.FillComputeImageViews(indices_span, image_view_ids); if (assembly_program.handle != 0) { - // FIXME: State track this - glEnable(GL_COMPUTE_PROGRAM_NV); - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); - program_manager.BindProgram(0); + program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { program_manager.BindProgram(source_program.handle); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 48669b3cd..df7e1f644 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -10,6 +10,7 @@ #include #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #pragma optimize("", off) @@ -24,6 +25,12 @@ class ProgramManager { }; public: + explicit ProgramManager(const Device& device) { + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } + } + void BindProgram(GLuint program) { if (current_source_program == program) { return; @@ -32,6 +39,17 @@ public: glUseProgram(program); } + void BindComputeAssemblyProgram(GLuint program) { + if (current_compute_assembly_program != program) { + current_compute_assembly_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void BindAssemblyPrograms(std::span programs, u32 stage_mask) { const u32 changed_mask = current_assembly_mask ^ stage_mask; @@ -67,6 +85,7 @@ private: u32 current_assembly_mask = 0; std::array current_assembly_programs; + GLuint current_compute_assembly_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4e77ef808..a4805f3da 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,6 +130,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, + program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); -- cgit v1.2.3 From c0e4074721825e2af7be4f1a70408f5edb06597d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:00:55 -0300 Subject: gl_shader_manager: Remove unintentionally committed #pragma --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index df7e1f644..c922bcf82 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,8 +12,6 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" -#pragma optimize("", off) - namespace OpenGL { class ProgramManager { -- cgit v1.2.3 From 54decced922aaa73f4c30d696679f3602c930204 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:01:41 -0300 Subject: gl_shader_manager: Zero initialize current assembly programs --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index c922bcf82..5ec57d707 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -82,7 +82,7 @@ private: GLuint current_source_program = 0; u32 current_assembly_mask = 0; - std::array current_assembly_programs; + std::array current_assembly_programs{}; GLuint current_compute_assembly_program = 0; }; -- cgit v1.2.3 From 9e7b6622c25aa858b96bf0f1c7f94223a2f449a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:12:32 -0300 Subject: shader: Split profile and runtime information in separate structs --- .../backend/glasm/emit_context.cpp | 40 +- src/shader_recompiler/backend/glasm/emit_context.h | 6 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 19 +- src/shader_recompiler/backend/glasm/emit_glasm.h | 6 +- .../backend/spirv/emit_context.cpp | 26 +- src/shader_recompiler/backend/spirv/emit_context.h | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 20 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 6 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 4 +- .../backend/spirv/emit_spirv_special.cpp | 15 +- src/shader_recompiler/profile.h | 13 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 26 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 418 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 5 +- 14 files changed, 300 insertions(+), 308 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e42f186c1..659ff6d17 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -23,23 +23,25 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_) - : info{program.info}, profile{profile_} { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : profile{profile_}, runtime_info{runtime_info_} { // FIXME: Temporary partial implementation + const auto& info{program.info}; u32 cbuf_index{}; - for (const auto& desc : program.info.constant_buffer_descriptors) { + for (const auto& desc : info.constant_buffer_descriptors) { if (desc.count != 1) { throw NotImplementedException("Constant buffer descriptor array"); } Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } - for (const auto& desc : program.info.storage_buffers_descriptors) { + for (const auto& desc : info.storage_buffers_descriptors) { if (desc.count != 1) { throw NotImplementedException("Storage buffer descriptor array"); } } - if (const size_t num = program.info.storage_buffers_descriptors.size(); num > 0) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); } stage = program.stage; @@ -67,8 +69,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - const auto& generic{program.info.input_generics[index]}; + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const auto& generic{info.input_generics[index]}; if (generic.used) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", InterpDecorator(generic.interpolation), index, attr_stage, index, index); @@ -101,8 +103,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile index, index); } } - for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { - if (!program.info.stores_frag_color[index]) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index]) { continue; } if (index == 0) { @@ -111,28 +113,28 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < program.info.stores_generics.size(); ++index) { - if (program.info.stores_generics[index]) { + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } - image_buffer_bindings.reserve(program.info.image_buffer_descriptors.size()); - for (const auto& desc : program.info.image_buffer_descriptors) { + image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { image_buffer_bindings.push_back(bindings.image); bindings.image += desc.count; } - image_bindings.reserve(program.info.image_descriptors.size()); - for (const auto& desc : program.info.image_descriptors) { + image_bindings.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { image_bindings.push_back(bindings.image); bindings.image += desc.count; } - texture_buffer_bindings.reserve(program.info.texture_buffer_descriptors.size()); - for (const auto& desc : program.info.texture_buffer_descriptors) { + texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { texture_buffer_bindings.push_back(bindings.texture); bindings.texture += desc.count; } - texture_bindings.reserve(program.info.texture_descriptors.size()); - for (const auto& desc : program.info.texture_descriptors) { + texture_bindings.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { texture_bindings.push_back(bindings.texture); bindings.texture += desc.count; } diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index e76ed1d7c..1f057fdd5 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -16,6 +16,7 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Backend { @@ -31,7 +32,8 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -56,8 +58,8 @@ public: std::string code; RegAlloc reg_alloc{*this}; - const Info& info; const Profile& profile; + const RuntimeInfo& runtime_info; std::vector texture_buffer_bindings; std::vector image_buffer_bindings; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index f110fd7f8..edff04a44 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -374,8 +374,9 @@ std::string_view GetTessSpacing(TessSpacing spacing) { } } // Anonymous namespace -std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bindings) { - EmitContext ctx{program, bindings, profile}; +std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; @@ -385,18 +386,18 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi header += fmt::format("VERTICES_OUT {};", program.invocations); break; case Stage::TessellationEval: - header += - fmt::format("TESS_MODE {};" - "TESS_SPACING {};" - "TESS_VERTEX_ORDER {};", - GetTessMode(profile.tess_primitive), GetTessSpacing(profile.tess_spacing), - profile.tess_clockwise ? "CW" : "CCW"); + header += fmt::format("TESS_MODE {};" + "TESS_SPACING {};" + "TESS_VERTEX_ORDER {};", + GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "CW" : "CCW"); break; case Stage::Geometry: header += fmt::format("PRIMITIVE_IN {};" "PRIMITIVE_OUT {};" "VERTICES_OUT {};", - InputPrimitive(profile.input_topology), + InputPrimitive(runtime_info.input_topology), OutputPrimitive(program.output_topology), program.output_vertices); break; case Stage::Compute: diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index a0dfdd818..3d02d873e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -12,12 +12,12 @@ namespace Shader::Backend::GLASM { -[[nodiscard]] std::string EmitGLASM(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitGLASM(profile, program, binding); + return EmitGLASM(profile, {}, program, binding); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a98e08392..3e8899f53 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -136,7 +136,7 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, break; case Stage::Geometry: if (per_invocation) { - const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; type = ctx.TypeArray(type, ctx.Const(num_vertices)); } break; @@ -161,8 +161,8 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo while (element < 4) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; - if (!ctx.profile.xfb_varyings.empty()) { - xfb_varying = &ctx.profile.xfb_varyings[base_attr_index + element]; + if (!ctx.runtime_info.xfb_varyings.empty()) { + xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; } const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; @@ -208,7 +208,7 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { } std::optional AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.profile.generic_input_types.at(index)}; + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: return AttrInfo{ctx.input_f32, ctx.F32[1], false}; @@ -441,13 +441,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& binding) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + IR::Program& program, Bindings& bindings) + : Sirit::Module(profile_.supported_spirv), profile{profile_}, + runtime_info{runtime_info_}, stage{program.stage} { const bool is_unified{profile.unified_descriptor_binding}; - u32& uniform_binding{is_unified ? binding.unified : binding.uniform_buffer}; - u32& storage_binding{is_unified ? binding.unified : binding.storage_buffer}; - u32& texture_binding{is_unified ? binding.unified : binding.texture}; - u32& image_binding{is_unified ? binding.unified : binding.image}; + u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; + u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; + u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; + u32& image_binding{is_unified ? bindings.unified : bindings.image}; AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); @@ -1211,7 +1213,7 @@ void EmitContext::DefineInputs(const Info& info) { if (!generic.used) { continue; } - const AttributeType input_type{profile.generic_input_types[index]}; + const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1256,7 +1258,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || profile.fixed_state_point_size) { + if (info.stores_point_size || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d2b79f6c1..961c9180c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -103,7 +103,8 @@ struct GenericElementInfo { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -150,6 +151,7 @@ public: } const Profile& profile; + const RuntimeInfo& runtime_info; Stage stage{}; Id void_id{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3e20ac3b9..cba420cda 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -226,16 +226,17 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Stage::TessellationEval: execution_model = spv::ExecutionModel::TessellationEvaluation; ctx.AddCapability(spv::Capability::Tessellation); - ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive)); - ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing)); - ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); + ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise + ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); break; case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::GeometryStreams); - switch (ctx.profile.input_topology) { + switch (ctx.runtime_info.input_topology) { case InputTopology::Points: ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); break; @@ -279,7 +280,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (program.info.stores_frag_depth) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } - if (ctx.profile.force_early_z) { + if (ctx.runtime_info.force_early_z) { ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); } break; @@ -402,7 +403,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_sample_id) { ctx.AddCapability(spv::Capability::SampleRateShading); } - if (!ctx.profile.xfb_varyings.empty()) { + if (!ctx.runtime_info.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } if (info.uses_derivatives) { @@ -433,8 +434,9 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding) { - EmitContext ctx{profile, program, binding}; +std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings) { + EmitContext ctx{profile, runtime_info, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (profile.support_float_controls) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index d8ab2d8ed..db0c935fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,12 +16,12 @@ namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitSPIRV(profile, program, binding); + return EmitSPIRV(profile, {}, program, binding); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8e57ff070..c1b69c234 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -17,7 +17,7 @@ struct AttrInfo { }; std::optional AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.profile.generic_input_types.at(index)}; + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: return AttrInfo{ctx.input_f32, ctx.F32[1], false}; @@ -468,7 +468,7 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { } Id EmitYDirection(EmitContext& ctx) { - return ctx.Const(ctx.profile.y_negate ? -1.0f : 1.0f); + return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index ba948f3c9..072a3b1bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -18,8 +18,8 @@ void ConvertDepthMode(EmitContext& ctx) { } void SetFixedPipelinePointSize(EmitContext& ctx) { - if (ctx.profile.fixed_state_point_size) { - const float point_size{*ctx.profile.fixed_state_point_size}; + if (ctx.runtime_info.fixed_state_point_size) { + const float point_size{*ctx.runtime_info.fixed_state_point_size}; ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); } } @@ -62,7 +62,10 @@ Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1 } void AlphaTest(EmitContext& ctx) { - const auto comparison{*ctx.profile.alpha_test_func}; + if (!ctx.runtime_info.alpha_test_func) { + return; + } + const auto comparison{*ctx.runtime_info.alpha_test_func}; if (comparison == CompareFunction::Always) { return; } @@ -76,7 +79,7 @@ void AlphaTest(EmitContext& ctx) { const Id true_label{ctx.OpLabel()}; const Id discard_label{ctx.OpLabel()}; - const Id alpha_reference{ctx.Const(ctx.profile.alpha_test_reference)}; + const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); @@ -113,7 +116,7 @@ void EmitPrologue(EmitContext& ctx) { } void EmitEpilogue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { + if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { ConvertDepthMode(ctx); } if (ctx.stage == Stage::Fragment) { @@ -122,7 +125,7 @@ void EmitEpilogue(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - if (ctx.profile.convert_depth_mode) { + if (ctx.runtime_info.convert_depth_mode) { ConvertDepthMode(ctx); } if (stream.IsImmediate()) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 12699511a..c46452c3d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -81,19 +81,22 @@ struct Profile { bool support_viewport_mask{}; bool support_typeless_image_loads{}; bool support_demote_to_helper_invocation{}; - bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; + + bool warp_size_potentially_larger_than_guest{}; bool lower_left_origin_mode{}; - // FClamp is broken and OpFMax + OpFMin should be used instead + /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; - // Offset image operands with an unsigned type do not work + /// Offset image operands with an unsigned type do not work bool has_broken_unsigned_image_offsets{}; - // Signed instructions with unsigned data types are misinterpreted + /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; - // Ignores SPIR-V ordered vs unordered using GLSL semantics + /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; +}; +struct RuntimeInfo { std::array generic_input_types{}; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b84b36b9d..d7efbdd01 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,33 +61,15 @@ const Shader::Profile profile{ .support_viewport_mask = true, .support_typeless_image_loads = true, .support_demote_to_helper_invocation = false, - .warp_size_potentially_larger_than_guest = true, .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, .lower_left_origin_mode = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, .ignore_nan_fp_comparisons = true, - - .generic_input_types = {}, - .convert_depth_mode = false, - .force_early_z = false, - - .tess_primitive = {}, - .tess_spacing = {}, - .tess_clockwise = false, - - .input_topology = Shader::InputTopology::Triangles, - - .fixed_state_point_size = std::nullopt, - - .alpha_test_func = Shader::CompareFunction::Always, - .alpha_test_reference = 0.0f, - - .y_negate = false, - - .xfb_varyings = {}, }; using Shader::Backend::GLASM::EmitGLASM; @@ -302,10 +284,10 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program, binding)}; + const std::string code{EmitGLASM(profile, {}, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, {}, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7830c0194..88db10b75 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -89,6 +89,208 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } + +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program) { + Shader::RuntimeInfo info; + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != 0}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + } + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.state.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + break; + case Shader::Stage::Fragment: + info.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + info.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + info.force_early_z = key.state.early_z != 0; + info.y_negate = key.state.y_negate != 0; + return info; +} } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -124,7 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - base_profile = Shader::Profile{ + profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, @@ -153,14 +355,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, - .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, - .generic_input_types{}, - .fixed_state_point_size{}, - .alpha_test_func{}, - .xfb_varyings{}, }; } @@ -329,8 +527,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program)}; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -391,7 +589,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(base_profile, program)}; + const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { @@ -403,206 +601,4 @@ std::unique_ptr PipelineCache::CreateComputePipeline( thread_worker, program.info, std::move(spv_module)); } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { - if (attr.enabled == 0) { - return Shader::AttributeType::Disabled; - } - switch (attr.Type()) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return Shader::AttributeType::Float; - case Maxwell::VertexAttribute::Type::SignedInt: - return Shader::AttributeType::SignedInt; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return Shader::AttributeType::UnsignedInt; - } - return Shader::AttributeType::Float; -} - -static std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - -Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { - Shader::Profile profile{base_profile}; - - const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; - const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; - const float point_size{Common::BitCast(key.state.point_size)}; - switch (stage) { - case Shader::Stage::VertexB: - if (!has_geometry) { - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - } - std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), - &CastAttributeType); - break; - case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - profile.tess_clockwise = key.state.tessellation_clockwise == 0; - profile.tess_primitive = [&key] { - const u32 raw{key.state.tessellation_primitive.Value()}; - switch (static_cast(raw)) { - case Maxwell::TessellationPrimitive::Isolines: - return Shader::TessPrimitive::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return Shader::TessPrimitive::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return Shader::TessPrimitive::Quads; - } - UNREACHABLE(); - return Shader::TessPrimitive::Triangles; - }(); - profile.tess_spacing = [&] { - const u32 raw{key.state.tessellation_spacing}; - switch (static_cast(raw)) { - case Maxwell::TessellationSpacing::Equal: - return Shader::TessSpacing::Equal; - case Maxwell::TessellationSpacing::FractionalOdd: - return Shader::TessSpacing::FractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return Shader::TessSpacing::FractionalEven; - } - UNREACHABLE(); - return Shader::TessSpacing::Equal; - }(); - break; - case Shader::Stage::Geometry: - if (program.output_topology == Shader::OutputTopology::PointList) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - break; - case Shader::Stage::Fragment: - profile.alpha_test_func = MaxwellToCompareFunction( - key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); - profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); - break; - default: - break; - } - switch (key.state.topology) { - case Maxwell::PrimitiveTopology::Points: - profile.input_topology = Shader::InputTopology::Points; - break; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - profile.input_topology = Shader::InputTopology::Lines; - break; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - case Maxwell::PrimitiveTopology::Quads: - case Maxwell::PrimitiveTopology::QuadStrip: - case Maxwell::PrimitiveTopology::Polygon: - case Maxwell::PrimitiveTopology::Patches: - profile.input_topology = Shader::InputTopology::Triangles; - break; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - profile.input_topology = Shader::InputTopology::LinesAdjacency; - break; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - profile.input_topology = Shader::InputTopology::TrianglesAdjacency; - break; - } - profile.force_early_z = key.state.early_z != 0; - profile.y_negate = key.state.y_negate != 0; - return profile; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4e48b4956..4116cc73f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -129,9 +129,6 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program); - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -148,7 +145,7 @@ private: ShaderPools main_pools; - Shader::Profile base_profile; + Shader::Profile profile; std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; -- cgit v1.2.3 From c07cc9d6a560d14e25ec59974ae5a15a7842d779 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:57:52 -0300 Subject: gl_shader_cache: Pass shader runtime information --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 76 +++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d7efbdd01..b4f26dd74 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -184,6 +184,76 @@ GLenum AssemblyStage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, + const Shader::IR::Program& program) { + UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); + + Shader::RuntimeInfo info; + switch (program.stage) { + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + switch (key.tessellation_primitive) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + switch (key.tessellation_spacing) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + + break; + default: + break; + } + switch (key.gs_input_topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + return info; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -283,11 +353,13 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; + + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, {}, program, binding)}; + const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, {}, program, binding)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } -- cgit v1.2.3 From 69b910e9e7c2b9c361f4389cb1d136105b991bc0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 17:19:35 -0300 Subject: video_core: Abstract transform feedback translation utility --- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/fixed_pipeline_state.cpp | 25 +++--- .../renderer_vulkan/fixed_pipeline_state.h | 15 +--- .../renderer_vulkan/vk_pipeline_cache.cpp | 86 +------------------ src/video_core/transform_feedback.cpp | 98 ++++++++++++++++++++++ src/video_core/transform_feedback.h | 30 +++++++ 6 files changed, 145 insertions(+), 111 deletions(-) create mode 100644 src/video_core/transform_feedback.cpp create mode 100644 src/video_core/transform_feedback.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b008c37c0..8250f736c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -187,6 +187,8 @@ add_library(video_core STATIC textures/decoders.h textures/texture.cpp textures/texture.h + transform_feedback.cpp + transform_feedback.h video_core.cpp video_core.h vulkan_common/vulkan_debug_callback.cpp diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 24834e0f7..3a43c329f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -15,9 +15,7 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { - namespace { - constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; @@ -39,6 +37,16 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POLYGON, // Patches }; +void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, @@ -121,7 +129,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, dynamic_state.Refresh(regs); } if (xfb_enabled != 0) { - xfb_state.Refresh(regs); + RefreshXfbState(xfb_state, regs); } } @@ -164,17 +172,6 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t enable.Assign(1); } -void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) { - std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) { - return Layout{ - .stream = layout.stream, - .varying_count = layout.varying_count, - .stride = layout.stride, - }; - }); - varyings = regs.tfb_varying_locs; -} - void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 31de6b2c8..0f1eff9cd 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -12,6 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" +#include "video_core/transform_feedback.h" namespace Vulkan { @@ -130,18 +131,6 @@ struct FixedPipelineState { } }; - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - - void Refresh(const Maxwell& regs); - }; - struct DynamicState { union { u32 raw1; @@ -213,7 +202,7 @@ struct FixedPipelineState { std::array attachments; std::array viewport_swizzles; DynamicState dynamic_state; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 88db10b75..f86bf9c30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,88 +109,6 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } -std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -206,7 +124,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } @@ -248,7 +166,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, info.fixed_state_point_size = point_size; } if (key.state.xfb_enabled != 0) { - info.xfb_varyings = MakeTransformFeedbackVaryings(key); + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; break; diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp new file mode 100644 index 000000000..db52fff93 --- /dev/null +++ b/src/video_core/transform_feedback.cpp @@ -0,0 +1,98 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/transform_feedback.h" + +namespace VideoCommon { + +std::vector MakeTransformFeedbackVaryings( + const TransformFeedbackState& state) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { + const auto& locations = state.varyings[buffer]; + const auto& layout = state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying{ + .buffer = layout.stream, + .stride = layout.stride, + .offset = offset * 4, + .components = 1, + }; + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +} // namespace VideoCommon diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h new file mode 100644 index 000000000..6832c6db1 --- /dev/null +++ b/src/video_core/transform_feedback.h @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/profile.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCommon { + +struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + varyings; +}; + +std::vector MakeTransformFeedbackVaryings( + const TransformFeedbackState& state); + +} // namespace VideoCommon -- cgit v1.2.3 From 6bc54e12a0d274beee0cb7584f73429112ec98b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:17:53 -0300 Subject: glasm: Set transform feedback state --- .../renderer_opengl/gl_graphics_program.cpp | 90 +++++++++++++++++++- .../renderer_opengl/gl_graphics_program.h | 32 ++++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 98 ++-------------------- src/video_core/renderer_opengl/gl_rasterizer.h | 6 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 ++++- 5 files changed, 132 insertions(+), 113 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index b5d75aa13..9677a3ed6 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -12,7 +12,7 @@ #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { - +namespace { using Shader::ImageBufferDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; @@ -20,6 +20,35 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + size_t GraphicsProgramKey::Hash() const noexcept { return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); } @@ -34,7 +63,8 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos) + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( @@ -74,6 +104,10 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } } struct Spec { @@ -302,4 +336,56 @@ void GraphicsProgram::Configure(bool is_indexed) { } } +void GraphicsProgram::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsProgram::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 18292bb16..53a57ede5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -16,6 +16,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" namespace OpenGL { @@ -24,16 +25,6 @@ class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - std::array unique_hashes; union { u32 raw; @@ -45,7 +36,7 @@ struct GraphicsProgramKey { BitField<10, 1, u32> tessellation_clockwise; }; std::array padding; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; size_t Hash() const noexcept; @@ -75,11 +66,22 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos); + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); void Configure(bool is_indexed); + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -96,6 +98,12 @@ private: std::array base_storage_bindings{}; std::array num_texture_buffers{}; std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4834d58f0..51ff42ee9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -51,37 +51,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); namespace { - constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} - void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } @@ -253,7 +224,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(primitive_mode); + BeginTransformFeedback(program, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -1025,68 +996,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::SyncTransformFeedback() { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs = maxwell3d.regs; - - static constexpr std::size_t STRIDE = 3; - std::array attribs; - std::array streams; - - GLint* cursor = attribs.data(); - GLint* current_stream = streams.data(); - - for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - - *current_stream = static_cast(feedback); - if (current_stream != streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += STRIDE; - } - } - - const GLsizei num_attribs = static_cast((cursor - attribs.data()) / STRIDE); - const GLsizei num_strides = static_cast(current_stream - streams.data()); - glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), - GL_INTERLEAVED_ATTRIBS); -} - -void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } - if (device.UseAssemblyShaders()) { - SyncTransformFeedback(); - } + program->ConfigureTransformFeedback(); + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1100,11 +1016,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = maxwell3d.regs; - if (regs.tfb_enabled == 0) { - return; + if (maxwell3d.regs.tfb_enabled != 0) { + glEndTransformFeedback(); } - glEndTransformFeedback(); } AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2fdcbe4ba..08f509c19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -193,12 +193,8 @@ private: /// Syncs vertex instances to match the guest state void SyncVertexInstances(); - /// Syncs transform feedback state to match guest state - /// @note Only valid on assembly shaders - void SyncTransformFeedback(); - /// Begin a transform feedback - void BeginTransformFeedback(GLenum primitive_mode); + void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4f26dd74..0a0f1324f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,6 +254,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, } return info; } + +void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -282,7 +293,10 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); - + graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0); + if (graphics_key.xfb_enabled) { + SetXfbState(graphics_key.xfb_state, regs); + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { @@ -368,7 +382,8 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos); + std::move(source_program), std::move(assembly_programs), infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } std::unique_ptr ShaderCache::CreateComputeProgram( -- cgit v1.2.3 From 84feabac881443d27f84f8fec5eba6dc3b13b620 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:27:37 -0300 Subject: glasm: Implement forced early Z --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 8 ++++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index edff04a44..0c2bbf284 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -261,7 +261,8 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } -void SetupOptions(const IR::Program& program, const Profile& profile, std::string& header) { +void SetupOptions(const IR::Program& program, const Profile& profile, + const RuntimeInfo& runtime_info, std::string& header) { const Info& info{program.info}; const Stage stage{program.stage}; @@ -296,6 +297,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin header += "OPTION NV_viewport_array2;"; } } + if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { + header += "OPTION NV_early_fragment_tests;"; + } const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { header += "OPTION ARB_draw_buffers;"; @@ -380,7 +384,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; - SetupOptions(program, profile, header); + SetupOptions(program, profile, runtime_info, header); switch (program.stage) { case Stage::TessellationControl: header += fmt::format("VERTICES_OUT {};", program.invocations); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a0f1324f..e678b4bb2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,8 +219,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, return Shader::TessSpacing::Equal; }(); break; - case Shader::Stage::Geometry: - + case Shader::Stage::Fragment: + info.force_early_z = key.early_z != 0; break; default: break; -- cgit v1.2.3 From df406246d9117ba1c428d81ba7466ba0291ece3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:36:30 -0300 Subject: gl_shader_cache: Improve GLASM error print logic --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e678b4bb2..747a133fb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -140,13 +140,16 @@ OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { glGenProgramsARB(1, &program.handle); glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, static_cast(code.size()), code.data()); - if (!Settings::values.renderer_debug) { - return program; - } - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "{}", code); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } + } } return program; } -- cgit v1.2.3 From c31521512fd49603ea42c93e2a6eac5d7985cd78 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:46:40 -0300 Subject: gl_shader_cache,glasm: Conditionally use typeless image reads extension --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 74 +++++++++++----------- src/video_core/renderer_opengl/gl_shader_cache.h | 2 + 3 files changed, 43 insertions(+), 39 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0c2bbf284..8718cc7ec 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -271,8 +271,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;" "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;" - "OPTION EXT_shader_image_load_formatted;"; + "OPTION ARB_derivative_control;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -297,6 +296,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_viewport_array2;"; } } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + header += "OPTION EXT_shader_image_load_formatted;"; + } if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 747a133fb..2c0510f11 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -36,42 +36,6 @@ namespace OpenGL { namespace { -// FIXME: Move this somewhere else -const Shader::Profile profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = true, - .support_viewport_mask = true, - .support_typeless_image_loads = true, - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - - .warp_size_potentially_larger_than_guest = true, - .lower_left_origin_mode = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, -}; - using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; @@ -279,7 +243,43 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} {} + state_tracker_} { + profile = Shader::Profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = true, + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + }; +} ShaderCache::~ShaderCache() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b479d073a..b49cd0ac7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,6 +86,8 @@ private: ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; + + Shader::Profile profile; }; } // namespace OpenGL -- cgit v1.2.3 From 1bccb43cbecdbf069f5c86086670a8d5440408e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:47:48 -0300 Subject: gl_shader_cache: Conditionally use viewport mask --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2c0510f11..cf03280fa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,7 +266,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_vote = true, .support_viewport_index_layer_non_geometry = device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = true, + .support_viewport_mask = device.HasNvViewportArray2(), .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, -- cgit v1.2.3 From 80884e32701e1e93fded045be4c235ff143d6ea0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 21:24:24 -0300 Subject: gl_graphics_program: Fix texture buffer bindings --- .../renderer_opengl/gl_graphics_program.cpp | 59 +++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 9677a3ed6..7c3d23f85 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "common/cityhash.h" @@ -14,12 +15,24 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -77,30 +90,25 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } u32 num_textures{}; u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - for (const auto& desc : info.constant_buffer_descriptors) { - base_uniform_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.storage_buffers_descriptors) { - base_storage_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers[stage] += desc.count; - num_textures += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers[stage] += desc.count; - num_images += desc.count; - } - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); @@ -151,8 +159,8 @@ void GraphicsProgram::Configure(bool is_indexed) { const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v || + std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; @@ -297,6 +305,9 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_binding += num_texture_buffers[stage]; image_binding += num_image_buffers[stage]; + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { -- cgit v1.2.3 From 40179282137370380387cab2610dcf21bd709efa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 May 2021 03:24:19 -0300 Subject: gl_shader_cache: Do not flip tessellation on OpenGL --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cf03280fa..ceec83a8a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -159,8 +159,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { switch (key.tessellation_primitive) { case Maxwell::TessellationPrimitive::Isolines: -- cgit v1.2.3 From eacf18cce9a05a28f50e916a752c04b0c9337707 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:28 -0300 Subject: gl_shader_cache: Rename Program abstractions into Pipeline --- src/video_core/CMakeLists.txt | 8 +- .../renderer_opengl/gl_compute_pipeline.cpp | 182 ++++++++++ .../renderer_opengl/gl_compute_pipeline.h | 84 +++++ .../renderer_opengl/gl_compute_program.cpp | 182 ---------- .../renderer_opengl/gl_compute_program.h | 84 ----- .../renderer_opengl/gl_graphics_pipeline.cpp | 402 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_pipeline.h | 118 ++++++ .../renderer_opengl/gl_graphics_program.cpp | 402 --------------------- .../renderer_opengl/gl_graphics_program.h | 118 ------ src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 42 +-- src/video_core/renderer_opengl/gl_shader_cache.h | 32 +- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 14 files changed, 834 insertions(+), 834 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_pipeline.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_pipeline.h delete mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp delete mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_pipeline.h delete mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp delete mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8250f736c..1ef3a6189 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,14 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h - renderer_opengl/gl_compute_program.cpp - renderer_opengl/gl_compute_program.h + renderer_opengl/gl_compute_pipeline.cpp + renderer_opengl/gl_compute_pipeline.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_graphics_program.cpp - renderer_opengl/gl_graphics_program.h + renderer_opengl/gl_graphics_pipeline.cpp + renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp new file mode 100644 index 000000000..700ebd8b8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputePipelineKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputePipeline::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + if (assembly_program.handle != 0) { + program_manager.BindComputeAssemblyProgram(assembly_program.handle); + } else { + program_manager.BindProgram(source_program.handle); + } + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h new file mode 100644 index 000000000..e3b94e2f3 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -0,0 +1,84 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputePipelineKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputePipelineKey&) const noexcept; + + bool operator!=(const ComputePipelineKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputePipeline { +public: + explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp deleted file mode 100644 index ce52a0052..000000000 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/cityhash.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -using Shader::ImageBufferDescriptor; -using Tegra::Texture::TexturePair; -using VideoCommon::ImageId; - -constexpr u32 MAX_TEXTURES = 64; -constexpr u32 MAX_IMAGES = 16; - -size_t ComputeProgramKey::Hash() const noexcept { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof *this)); -} - -bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { - return std::memcmp(this, &rhs, sizeof *this) == 0; -} - -ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - ASSERT(num_textures <= MAX_TEXTURES); - - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } - ASSERT(num_images <= MAX_IMAGES); -} - -void ComputeProgram::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); - buffer_cache.UnbindComputeStorageBuffers(); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++ssbo_index; - } - texture_cache.SynchronizeComputeDescriptors(); - - std::array image_view_ids; - boost::container::static_vector image_view_indices; - std::array samplers; - std::array textures; - std::array images; - GLsizei sampler_binding{}; - GLsizei texture_binding{}; - GLsizei image_binding{}; - - const auto& qmd{kepler_compute.launch_description}; - const auto& cbufs{qmd.const_buffer_config}; - const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc, u32 index) { - ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); - const u32 index_offset{index << desc.size_shift}; - const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { - if (desc.has_secondary) { - ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); - const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; - const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - secondary_offset}; - const u32 lhs_raw{gpu_memory.Read(addr)}; - const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TexturePair(lhs_raw | rhs_raw, via_header_index); - } - } - return TexturePair(gpu_memory.Read(addr), via_header_index); - }}; - const auto add_image{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - } - }}; - for (const auto& desc : info.texture_buffer_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - samplers[sampler_binding++] = 0; - } - } - std::ranges::for_each(info.image_buffer_descriptors, add_image); - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers[sampler_binding++] = sampler->Handle(); - } - } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - if (assembly_program.handle != 0) { - program_manager.BindComputeAssemblyProgram(assembly_program.handle); - } else { - program_manager.BindProgram(source_program.handle); - } - buffer_cache.UnbindComputeTextureBuffers(); - size_t texbuf_index{}; - const auto add_buffer{[&](const auto& desc) { - constexpr bool is_image = std::is_same_v; - for (u32 i = 0; i < desc.count; ++i) { - bool is_written{false}; - if constexpr (is_image) { - is_written = desc.is_written; - } - ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; - buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written, is_image); - ++texbuf_index; - } - }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - - buffer_cache.UpdateComputeBuffers(); - - buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); - buffer_cache.BindHostComputeBuffers(); - - const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; - texture_binding += num_texture_buffers; - image_binding += num_image_buffers; - - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); - } - } - for (const auto& desc : info.image_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); - } - } - if (texture_binding != 0) { - ASSERT(texture_binding == sampler_binding); - glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); - } - if (image_binding != 0) { - glBindImageTextures(0, image_binding, images.data()); - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h deleted file mode 100644 index ddb00dc1d..000000000 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace Tegra { -class MemoryManager; -} - -namespace Tegra::Engines { -class KeplerCompute; -} - -namespace Shader { -struct Info; -} - -namespace OpenGL { - -class ProgramManager; - -struct ComputeProgramKey { - u64 unique_hash; - u32 shared_memory_size; - std::array workgroup_size; - - size_t Hash() const noexcept; - - bool operator==(const ComputeProgramKey&) const noexcept; - - bool operator!=(const ComputeProgramKey& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class ComputeProgram { -public: - explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); - - void Configure(); - -private: - TextureCache& texture_cache; - BufferCache& buffer_cache; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::KeplerCompute& kepler_compute; - ProgramManager& program_manager; - - Shader::Info info; - OGLProgram source_program; - OGLAssemblyProgram assembly_program; - - u32 num_texture_buffers{}; - u32 num_image_buffers{}; -}; - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp new file mode 100644 index 000000000..32df35202 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -0,0 +1,402 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { +namespace { +using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + +size_t GraphicsPipelineKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsPipeline::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + if (assembly_programs[0].handle != 0) { + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); + } else { + program_manager.BindProgram(program.handle); + } + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +void GraphicsPipeline::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h new file mode 100644 index 000000000..62f700cf5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -0,0 +1,118 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsPipelineKey { + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + VideoCommon::TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineKey&) const noexcept; + + bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsPipelineKey); + } else { + return offsetof(GraphicsPipelineKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsPipeline { +public: + explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); + + void Configure(bool is_indexed); + + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + +private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array assembly_programs; + u32 enabled_stages_mask{}; + + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp deleted file mode 100644 index 7c3d23f85..000000000 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ /dev/null @@ -1,402 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/cityhash.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/texture_cache/texture_cache.h" - -namespace OpenGL { -namespace { -using Shader::ImageBufferDescriptor; -using Shader::ImageDescriptor; -using Shader::TextureBufferDescriptor; -using Shader::TextureDescriptor; -using Tegra::Texture::TexturePair; -using VideoCommon::ImageId; - -constexpr u32 MAX_TEXTURES = 64; -constexpr u32 MAX_IMAGES = 8; - -template -u32 AccumulateCount(Range&& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} -} // Anonymous namespace - -size_t GraphicsProgramKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - -GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - u32 num_textures{}; - u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); - } - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - if (assembly_programs[0].handle != 0 && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } -} - -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - -void GraphicsProgram::Configure(bool is_indexed) { - std::array image_view_ids; - std::array image_view_indices; - std::array samplers; - size_t image_view_index{}; - GLsizei sampler_binding{}; - - texture_cache.SynchronizeGraphicsDescriptors(); - - buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); - buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); - - const auto& regs{maxwell3d.regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - if constexpr (Spec::has_storage_buffers) { - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; - } - } - const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc, u32 index) { - ASSERT(cbufs[desc.cbuf_index].enabled); - const u32 index_offset{index << desc.size_shift}; - const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { - if (desc.has_secondary) { - ASSERT(cbufs[desc.secondary_cbuf_index].enabled); - const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; - const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - second_offset}; - const u32 lhs_raw{gpu_memory.Read(addr)}; - const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TexturePair(raw, via_header_index); - } - } - return TexturePair(gpu_memory.Read(addr), via_header_index); - }}; - const auto add_image{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - } - }}; - if constexpr (Spec::has_texture_buffers) { - for (const auto& desc : info.texture_buffer_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - samplers[sampler_binding++] = 0; - } - } - } - if constexpr (Spec::has_image_buffers) { - for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); - } - } - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_binding++] = sampler->Handle(); - } - } - if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - add_image(desc); - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - config_stage(0); - } - if constexpr (Spec::enabled_stages[1]) { - config_stage(1); - } - if constexpr (Spec::enabled_stages[2]) { - config_stage(2); - } - if constexpr (Spec::enabled_stages[3]) { - config_stage(3); - } - if constexpr (Spec::enabled_stages[4]) { - config_stage(4); - } - const std::span indices_span(image_view_indices.data(), image_view_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - - ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { - size_t index{}; - const auto add_buffer{[&](const auto& desc) { - constexpr bool is_image = std::is_same_v; - for (u32 i = 0; i < desc.count; ++i) { - bool is_written{false}; - if constexpr (is_image) { - is_written = desc.is_written; - } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written, is_image); - ++index; - ++texture_buffer_index; - } - }}; - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - - if constexpr (Spec::has_texture_buffers) { - for (const auto& desc : info.texture_buffer_descriptors) { - add_buffer(desc); - } - } - if constexpr (Spec::has_image_buffers) { - for (const auto& desc : info.image_buffer_descriptors) { - add_buffer(desc); - } - } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } - if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - bind_stage_info(0); - } - if constexpr (Spec::enabled_stages[1]) { - bind_stage_info(1); - } - if constexpr (Spec::enabled_stages[2]) { - bind_stage_info(2); - } - if constexpr (Spec::enabled_stages[3]) { - bind_stage_info(3); - } - if constexpr (Spec::enabled_stages[4]) { - bind_stage_info(4); - } - buffer_cache.UpdateGraphicsBuffers(is_indexed); - buffer_cache.BindHostGeometryBuffers(is_indexed); - - if (assembly_programs[0].handle != 0) { - program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); - } else { - program_manager.BindProgram(program.handle); - } - const ImageId* views_it{image_view_ids.data()}; - GLsizei texture_binding = 0; - GLsizei image_binding = 0; - std::array textures; - std::array images; - const auto prepare_stage{[&](size_t stage) { - buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); - buffer_cache.BindHostStageBuffers(stage); - - texture_binding += num_texture_buffers[stage]; - image_binding += num_image_buffers[stage]; - - views_it += num_texture_buffers[stage]; - views_it += num_image_buffers[stage]; - - const auto& info{stage_infos[stage]}; - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); - } - } - for (const auto& desc : info.image_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - prepare_stage(0); - } - if constexpr (Spec::enabled_stages[1]) { - prepare_stage(1); - } - if constexpr (Spec::enabled_stages[2]) { - prepare_stage(2); - } - if constexpr (Spec::enabled_stages[3]) { - prepare_stage(3); - } - if constexpr (Spec::enabled_stages[4]) { - prepare_stage(4); - } - if (texture_binding != 0) { - ASSERT(texture_binding == sampler_binding); - glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); - } - if (image_binding != 0) { - glBindImageTextures(0, image_binding, images.data()); - } -} - -void GraphicsProgram::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs{maxwell3d.regs}; - - GLint* cursor{xfb_attribs.data()}; - GLint* current_stream{xfb_streams.data()}; - - for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - *current_stream = static_cast(feedback); - if (current_stream != xfb_streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += XFB_ENTRY_STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += XFB_ENTRY_STRIDE; - } - } - num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); - num_xfb_strides = static_cast(current_stream - xfb_streams.data()); -} - -void GraphicsProgram::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h deleted file mode 100644 index 53a57ede5..000000000 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/transform_feedback.h" - -namespace OpenGL { - -class ProgramManager; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - std::array unique_hashes; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; - BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - std::array padding; - VideoCommon::TransformFeedbackState xfb_state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsProgramKey&) const noexcept; - - bool operator!=(const GraphicsProgramKey& rhs) const noexcept { - return !operator==(rhs); - } - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class GraphicsProgram { -public: - explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); - - void Configure(bool is_indexed); - - void ConfigureTransformFeedback() const { - if (num_xfb_attribs != 0) { - ConfigureTransformFeedbackImpl(); - } - } - -private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); - - void ConfigureTransformFeedbackImpl() const; - - TextureCache& texture_cache; - BufferCache& buffer_cache; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - ProgramManager& program_manager; - StateTracker& state_tracker; - - OGLProgram program; - std::array assembly_programs; - u32 enabled_stages_mask{}; - - std::array stage_infos{}; - std::array base_uniform_bindings{}; - std::array base_storage_bindings{}; - std::array num_texture_buffers{}; - std::array num_image_buffers{}; - - static constexpr std::size_t XFB_ENTRY_STRIDE = 3; - GLsizei num_xfb_attribs{}; - GLsizei num_xfb_strides{}; - std::array xfb_attribs{}; - std::array xfb_streams{}; -}; - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 51ff42ee9..72a6dfd2a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -218,13 +218,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; + GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - program->Configure(is_indexed); + pipeline->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(program, primitive_mode); + BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -271,7 +271,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; if (!program) { return; } @@ -996,7 +996,7 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 08f509c19..afd43b2ee 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -194,7 +194,7 @@ private: void SyncVertexInstances(); /// Begin a transform feedback - void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); + void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ceec83a8a..33757938a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -152,7 +152,7 @@ GLenum AssemblyStage(size_t stage_index) { return GL_NONE; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); @@ -282,7 +282,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; -GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { +GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; } @@ -302,18 +302,18 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { - program = CreateGraphicsProgram(); + program = CreateGraphicsPipeline(); } return program.get(); } -ComputeProgram* ShaderCache::CurrentComputeProgram() { +ComputePipeline* ShaderCache::CurrentComputePipeline() { const VideoCommon::ShaderInfo* const shader{ComputeShader()}; if (!shader) { return nullptr; } const auto& qmd{kepler_compute.launch_description}; - const ComputeProgramKey key{ + const ComputePipelineKey key{ .unique_hash = shader->unique_hash, .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, @@ -323,20 +323,20 @@ ComputeProgram* ShaderCache::CurrentComputeProgram() { if (!is_new) { return pipeline.get(); } - pipeline = CreateComputeProgram(key, shader); + pipeline = CreateComputePipeline(key, shader); return pipeline.get(); } -std::unique_ptr ShaderCache::CreateGraphicsProgram() { +std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GraphicsEnvironments environments; GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); + return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); } -std::unique_ptr ShaderCache::CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, +std::unique_ptr ShaderCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{0}; @@ -382,27 +382,27 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( if (!device.UseAssemblyShaders()) { LinkProgram(source_program.handle); } - return std::make_unique( + return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } -std::unique_ptr ShaderCache::CreateComputeProgram( - const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { +std::unique_ptr ShaderCache::CreateComputePipeline( + const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputeProgram(main_pools, key, env, true); + return CreateComputePipeline(main_pools, key, env, true); } -std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel) { +std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -418,9 +418,9 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, program.info, - std::move(source_program), std::move(asm_program)); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, + kepler_compute, program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b49cd0ac7..a56559ea9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -15,8 +15,8 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -55,24 +55,24 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); - [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); - [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: - std::unique_ptr CreateGraphicsProgram(); + std::unique_ptr CreateGraphicsPipeline(); - std::unique_ptr CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel); - std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, - const VideoCommon::ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, + const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel); + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel); Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -81,11 +81,11 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - GraphicsProgramKey graphics_key{}; + GraphicsPipelineKey graphics_key{}; ShaderPools main_pools; - std::unordered_map> graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; Shader::Profile profile; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 5ec57d707..88b734bcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -9,8 +9,8 @@ #include -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -- cgit v1.2.3 From a49532c8eb29807814214ab326ff970f5a964a03 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:52 -0300 Subject: video_core,shader: Clang-format fixes --- src/shader_recompiler/frontend/ir/patch.cpp | 2 +- src/shader_recompiler/frontend/ir/post_order.h | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_scheduler.h | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp index 1f770bc48..4c956a970 100644 --- a/src/shader_recompiler/frontend/ir/patch.cpp +++ b/src/shader_recompiler/frontend/ir/patch.cpp @@ -2,8 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "shader_recompiler/frontend/ir/patch.h" #include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/patch.h" namespace Shader::IR { diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h index 58a0467a0..07bfbadc3 100644 --- a/src/shader_recompiler/frontend/ir/post_order.h +++ b/src/shader_recompiler/frontend/ir/post_order.h @@ -4,8 +4,8 @@ #pragma once -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" +#include "shader_recompiler/frontend/ir/basic_block.h" namespace Shader::IR { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7e39b65bd..d50647ba7 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 6600fb142..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -6,11 +6,11 @@ #include #include -#include #include #include #include #include +#include #include "common/alignment.h" #include "common/common_types.h" -- cgit v1.2.3 From a41b2ed3917f9ca5af30773e4671f4829380dceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 20:39:55 -0300 Subject: gl_shader_cache: Add disk shader cache --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 113 +++++++++++++++++++-- src/video_core/renderer_opengl/gl_shader_cache.h | 10 +- 3 files changed, 116 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 72a6dfd2a..eec01e8c2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,7 +140,9 @@ void RasterizerOpenGL::SyncVertexInstances() { } void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) {} + const VideoCore::DiskResourceLoadCallback& callback) { + shader_cache.LoadDiskResources(title_id, stop_loading, callback); +} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 33757938a..3aa5ac31d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,17 +3,19 @@ // Refer to the license.txt file included. #include +#include #include #include -#include #include #include -#include #include "common/alignment.h" #include "common/assert.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -40,6 +42,8 @@ using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; template @@ -154,8 +158,6 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { - UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); - Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -282,6 +284,89 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; +void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "new_opengl"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + + struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; + }; + Common::StatefulThreadWorker workers( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); + + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + ctx->pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + ctx->pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); +} + GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; @@ -332,7 +417,18 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; + if (shader_cache_filename.empty()) { + return pipeline; + } + boost::container::static_vector env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] != 0) { + env_ptrs.push_back(&environments.envs[index]); + } + } + VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( @@ -396,7 +492,12 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputePipeline(main_pools, key, env, true); + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; + if (!shader_cache_filename.empty()) { + VideoCommon::SerializePipeline(key, std::array{&env}, + shader_cache_filename); + } + return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a56559ea9..16175318b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include #include @@ -23,10 +25,6 @@ namespace Tegra { class MemoryManager; } -namespace Core::Frontend { -class EmuWindow; -} - namespace OpenGL { class Device; @@ -55,6 +53,9 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); [[nodiscard]] ComputePipeline* CurrentComputePipeline(); @@ -88,6 +89,7 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + std::filesystem::path shader_cache_filename; }; } // namespace OpenGL -- cgit v1.2.3 From adb591a757ccb289634920d51cf519b515ca32b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:32:59 -0300 Subject: glasm: Use storage buffers instead of global memory when possible --- src/shader_recompiler/CMakeLists.txt | 1 - .../backend/glasm/emit_context.cpp | 13 +- src/shader_recompiler/backend/glasm/emit_glasm.h | 5 +- .../backend/glasm/emit_glasm_atomic.cpp | 351 ------------------- .../backend/glasm/emit_glasm_memory.cpp | 380 ++++++++++++++++++++- src/shader_recompiler/profile.h | 3 + src/video_core/renderer_opengl/gl_buffer_cache.cpp | 26 +- src/video_core/renderer_opengl/gl_buffer_cache.h | 6 + .../renderer_opengl/gl_compute_pipeline.cpp | 42 ++- .../renderer_opengl/gl_compute_pipeline.h | 12 +- src/video_core/renderer_opengl/gl_device.cpp | 18 +- src/video_core/renderer_opengl/gl_device.h | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 19 +- .../renderer_opengl/gl_graphics_pipeline.h | 12 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 30 +- 17 files changed, 503 insertions(+), 437 deletions(-) delete mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index becdb7d54..d6d8e5f59 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC backend/glasm/emit_context.h backend/glasm/emit_glasm.cpp backend/glasm/emit_glasm.h - backend/glasm/emit_glasm_atomic.cpp backend/glasm/emit_glasm_barriers.cpp backend/glasm/emit_glasm_bitwise_conversion.cpp backend/glasm/emit_glasm_composite.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index b5b0e2204..e18526816 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -40,13 +41,21 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } + u32 ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { if (desc.count != 1) { throw NotImplementedException("Storage buffer descriptor array"); } + if (runtime_info.glasm_use_storage_buffers) { + Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); + ++bindings.storage_buffer; + ++ssbo_index; + } } - if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { - Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + if (!runtime_info.glasm_use_storage_buffers) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { + Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + } } stage = program.stage; switch (program.stage) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3d02d873e..3df32a4a6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM { [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); -[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { +[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program) { Bindings binding; - return EmitGLASM(profile, {}, program, binding); + return EmitGLASM(profile, runtime_info, program, binding); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp deleted file mode 100644 index e72b252a3..000000000 --- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/backend/glasm/emit_context.h" -#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::Backend::GLASM { -namespace { -void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, - std::string_view then_expr, std::string_view else_expr = {}) { - // Operate on bindless SSBO, call the expression with bounds checking - // address = c[binding].xy - // length = c[binding].z - const u32 sb_binding{binding.U32()}; - ctx.Add("PK64.U DC,c[{}];" // pointer = address - "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) - "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset - "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length - sb_binding, offset, offset, sb_binding); - if (else_expr.empty()) { - ctx.Add("IF NE.x;{}ENDIF;", then_expr); - } else { - ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); - } -} - -template -void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, - ValueType value, std::string_view operation, std::string_view size) { - const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, - fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); -} -} // namespace - -void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarS32 value) { - ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarS32 value) { - ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - Register value) { - ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "ADD", "U32"); -} - -void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarS32 value) { - Atom(ctx, inst, binding, offset, value, "MIN", "S32"); -} - -void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "MIN", "U32"); -} - -void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarS32 value) { - Atom(ctx, inst, binding, offset, value, "MAX", "S32"); -} - -void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "MAX", "U32"); -} - -void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); -} - -void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); -} - -void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "AND", "U32"); -} - -void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "OR", "U32"); -} - -void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "XOR", "U32"); -} - -void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); -} - -void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "ADD", "U64"); -} - -void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "S64"); -} - -void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "U64"); -} - -void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "S64"); -} - -void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "U64"); -} - -void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "AND", "U64"); -} - -void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "OR", "U64"); -} - -void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "XOR", "U64"); -} - -void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); -} - -void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarF32 value) { - Atom(ctx, inst, binding, offset, value, "ADD", "F32"); -} - -void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); -} - -void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); -} - -void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); -} - -void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicIAdd32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMin32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMin32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMax32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMax32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicInc32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicDec32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAnd32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicOr32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicXor32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicExchange32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicIAdd64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMin64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMin64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMax64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMax64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicInc64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicDec64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAnd64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicOr64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicXor64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicExchange64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMinF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMinF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMaxF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMaxF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} -} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 26b03587e..90dbb80d2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, } } -void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr, +void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, std::string_view else_expr = {}) { const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; for (size_t index = 0; index < num_buffers; ++index) { @@ -44,14 +45,22 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 "AND.U.CC RC.x,RC.x,RC.y;" - "IF NE.x;" // a && b - "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr - "PK64.U DC.y,c[{}];" // host_ssbo = cbuf - "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset - "{}" - "ELSE;", + "IF NE.x;" // a && b + "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, - address, address, index, then_expr); + address, address); + if (pointer_based) { + ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf + "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset + "{}" + "ELSE;", + index, expr); + } else { + ctx.Add("CVT.U32.U64 RC.x,DC.x;" + "{},ssbo{}[RC.x];" + "ELSE;", + expr, index); + } } if (!else_expr.empty()) { ctx.Add("{}", else_expr); @@ -64,25 +73,54 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e template void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, std::string_view size) { - StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + } } void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), - fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + } } template void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { - GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); + } } void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret), - fmt::format("MOV.S {},0;", ret)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.S {},0;", ret)); + } +} + +template +void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + ValueType value, std::string_view operation, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), + offset); + } else { + StorageOp(ctx, binding, offset, + fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); + } } } // Anonymous namespace @@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o Write(ctx, binding, offset, value, "U32X4"); } +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value) { + ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U32"); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U32"); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U32"); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "AND", "U32"); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "OR", "U32"); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U32"); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U64"); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S64"); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U64"); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S64"); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U64"); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "AND", "U64"); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "OR", "U64"); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U64"); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F32"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c46452c3d..f8913bf14 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -111,7 +111,10 @@ struct RuntimeInfo { std::optional alpha_test_func; float alpha_test_reference{}; + // Static y negate value bool y_negate{}; + // Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2d0ef1307..334ed470f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + const GLuint base_binding = graphics_base_storage_bindings[stage]; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); - } else { - const GLuint base_binding = graphics_base_storage_bindings[stage]; - const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), - static_cast(offset), static_cast(size)); } } void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + if (size != 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, reinterpret_cast(&ssbo)); - } else if (size == 0) { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), - static_cast(offset), static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4986c65fd..bc16abafb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -147,6 +147,10 @@ public: image_handles = image_handles_; } + void SetEnableStorageBuffers(bool use_storage_buffers_) { + use_storage_buffers = use_storage_buffers_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -160,6 +164,8 @@ private: bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + bool use_storage_buffers = false; + u32 max_attributes = 0; std::array graphics_base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 700ebd8b8..5cf5f97a9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -17,6 +17,15 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; +template +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); @@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep return std::memcmp(this, &rhs, sizeof *this) == 0; } -ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } + + num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); + num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + + const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); + + const bool is_glasm{assembly_program.handle != 0}; + const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + use_storage_buffers = + !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory = !use_storage_buffers && + std::ranges::any_of(info.storage_buffers_descriptors, + [](const auto& desc) { return desc.is_written; }); } void ComputePipeline::Configure() { @@ -150,6 +159,7 @@ void ComputePipeline::Configure() { buffer_cache.UpdateComputeBuffers(); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index e3b94e2f3..dd6b62ef2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -28,6 +28,7 @@ struct Info; namespace OpenGL { +class Device; class ProgramManager; struct ComputePipelineKey { @@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v); class ComputePipeline { public: - explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: TextureCache& texture_cache; BufferCache& buffer_cache; @@ -70,6 +75,9 @@ private: u32 num_texture_buffers{}; u32 num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 18bbc4c1f..01da2bb57 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -135,13 +135,13 @@ Device::Device() { "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - max_uniform_buffers = BuildMaxUniformBuffers(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); + max_glasm_storage_buffer_blocks = GetInteger(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -236,22 +236,6 @@ std::string Device::GetVendorName() const { return vendor_name; } -Device::Device(std::nullptr_t) { - max_uniform_buffers.fill(std::numeric_limits::max()); - uniform_buffer_alignment = 4; - shader_storage_alignment = 4; - max_vertex_attributes = 16; - max_varyings = 15; - max_compute_shared_memory_size = 0x10000; - has_warp_intrinsics = true; - has_shader_ballot = true; - has_vertex_viewport_layer = true; - has_image_load_formatted = true; - has_texture_shadow_lod = true; - has_variable_aoffi = true; - has_depth_buffer_float = true; -} - bool Device::TestVariableAoffi() { return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 152a3acd3..d67f5693c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -13,7 +13,6 @@ namespace OpenGL { class Device { public: explicit Device(); - explicit Device(std::nullptr_t); [[nodiscard]] std::string GetVendorName() const; @@ -41,6 +40,10 @@ public: return max_compute_shared_memory_size; } + u32 GetMaxGLASMStorageBufferBlocks() const { + return max_glasm_storage_buffer_blocks; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -124,6 +127,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; + u32 max_glasm_storage_buffer_blocks{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 32df35202..19d85c482 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; template -u32 AccumulateCount(Range&& range) { +u32 AccumulateCount(const Range& range) { u32 num{}; for (const auto& desc : range) { num += desc.count; @@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu } u32 num_textures{}; u32 num_images{}; + u32 num_storage_buffers{}; for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; if (stage < 4) { @@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu num_textures += AccumulateCount(info.texture_descriptors); num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); - if (assembly_programs[0].handle != 0 && xfb_state) { + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } } @@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 62f700cf5..c1113e180 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,6 +20,7 @@ namespace OpenGL { +class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -77,6 +78,10 @@ public: } } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -99,6 +104,9 @@ private: std::array num_texture_buffers{}; std::array num_image_buffers{}; + bool use_storage_buffers{}; + bool writes_global_memory{}; + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eec01e8c2..5d4e80364 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { - ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; - if (!program) { + ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; + if (!pipeline) { return; } - program->Configure(); + pipeline->Configure(); const auto& qmd{kepler_compute.launch_description}; glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() { // Make sure memory stored from the previous GL command stream is visible // This is only needed on assembly shaders where we write to GPU memory with raw pointers - // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used - // and prefer using NV_shader_storage_buffer_object when possible - if (Settings::values.use_assembly_shaders.GetValue()) { + if (has_written_global_memory) { + has_written_global_memory = false; glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); } glFlush(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index afd43b2ee..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -225,7 +225,8 @@ private: std::array image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. - std::size_t num_queued_commands = 0; + size_t num_queued_commands = 0; + bool has_written_global_memory = false; u32 last_clip_distance_mask = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3aa5ac31d..287f497b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) { } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + bool glasm_use_storage_buffers) { Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, info.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + info.glasm_use_storage_buffers = glasm_use_storage_buffers; return info; } @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); - size_t env_index{0}; + size_t env_index{}; + u32 total_storage_buffers{}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -447,7 +450,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } } + const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; + const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; + std::array infos{}; OGLProgram source_program; @@ -466,7 +476,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; if (device.UseAssemblyShaders()) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); @@ -479,7 +489,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( LinkProgram(source_program.handle); } return std::make_unique( - texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } @@ -508,10 +518,18 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + + u32 num_storage_buffers{}; + for (const auto& desc : program.info.storage_buffers_descriptors) { + num_storage_buffers += desc.count; + } + Shader::RuntimeInfo info; + info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + OGLAssemblyProgram asm_program; OGLProgram source_program; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { const std::vector code{EmitSPIRV(profile, program)}; @@ -519,7 +537,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); } -- cgit v1.2.3 From 3b595fe8b28001eed4a936e2a7b465bd67dcc4b7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 16:47:49 -0300 Subject: glasm: Prepare XFB from state instead of global registers --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 19d85c482..38ec88b13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -362,13 +362,11 @@ void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. - const auto& regs{maxwell3d.regs}; - GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; + const auto& layout = xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -383,7 +381,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = regs.tfb_varying_locs[feedback]; + const auto& locations = xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; -- cgit v1.2.3 From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: shader: Handle host exceptions --- src/shader_recompiler/exception.h | 40 ++++++++++++++++---- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/translate/translate.cpp | 13 +++++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 43 +++++++++++++--------- src/video_core/renderer_opengl/gl_shader_cache.h | 5 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 35 ++++++++++++------ 8 files changed, 98 insertions(+), 45 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h index 6fe620801..013d7b1bf 100644 --- a/src/shader_recompiler/exception.h +++ b/src/shader_recompiler/exception.h @@ -5,38 +5,62 @@ #pragma once #include +#include +#include #include #include namespace Shader { -class LogicError : public std::logic_error { +class Exception : public std::exception { +public: + explicit Exception(std::string message_) noexcept : message{std::move(message_)} {} + + const char* what() const override { + return message.c_str(); + } + + void Prepend(std::string_view prepend) { + message.insert(0, prepend); + } + + void Append(std::string_view append) { + message += append; + } + +private: + std::string message; +}; + +class LogicError : public Exception { public: template LogicError(const char* message, Args&&... args) - : std::logic_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; -class RuntimeError : public std::runtime_error { +class RuntimeError : public Exception { public: template RuntimeError(const char* message, Args&&... args) - : std::runtime_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; -class NotImplementedException : public std::logic_error { +class NotImplementedException : public Exception { public: template NotImplementedException(const char* message, Args&&... args) - : std::logic_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} { + Append(" is not implemented"); + } }; -class InvalidArgument : public std::invalid_argument { +class InvalidArgument : public Exception { public: template InvalidArgument(const char* message, Args&&... args) - : std::invalid_argument{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp index 12ddf2ac9..ccc40c20c 100644 --- a/src/shader_recompiler/frontend/maxwell/opcodes.cpp +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, +#define INST(name, cute, encode) cute, #include "maxwell.inc" #undef INST }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ccdab1dad..900fc7ab1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/maxwell/program.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 0f4e7a251..8e3c4c5d5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -30,16 +30,21 @@ void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 locat TranslatorVisitor visitor{env, *block}; for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; - const Opcode opcode{Decode(insn)}; - switch (opcode) { + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { #define INST(name, cute, mask) \ case Opcode::name: \ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ break; #include "shader_recompiler/frontend/maxwell/maxwell.inc" #undef OPCODE - default: - throw LogicError("Invalid opcode {}", opcode); + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5d4e80364..54696d97d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -221,7 +221,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; - + if (!pipeline) { + return; + } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 287f497b5..7d2ec4efa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -45,6 +45,7 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::SerializePipeline; template auto MakeSpan(Container& container) { @@ -327,10 +328,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, workers.QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; - + auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -348,10 +350,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; - + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + graphics_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -419,8 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (shader_cache_filename.empty()) { + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + if (!pipeline || shader_cache_filename.empty()) { return pipeline; } boost::container::static_vector env_ptrs; @@ -429,13 +432,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, - bool build_in_parallel) { + ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -492,6 +495,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } std::unique_ptr ShaderCache::CreateComputePipeline( @@ -502,18 +509,17 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!shader_cache_filename.empty()) { - VideoCommon::SerializePipeline(key, std::array{&env}, - shader_cache_filename); + auto pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline || shader_cache_filename.empty()) { + return pipeline; } + SerializePipeline(key, std::array{&env}, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel) { + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -540,6 +546,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16175318b..cf74d34e4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,15 +65,14 @@ private: std::unique_ptr CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs, bool build_in_parallel); + std::span envs); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); std::unique_ptr CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel); + Shader::Environment& env); Core::Frontend::EmuWindow& emu_window; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f86bf9c30..b6998e37c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -303,6 +303,9 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } if (current_pipeline) { current_pipeline->AddTransition(pipeline.get()); } @@ -362,9 +365,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, env, false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -405,7 +409,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs, bool build_in_parallel) { + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -458,6 +462,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { @@ -466,7 +474,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (pipeline_cache_filename.empty()) { + if (!pipeline || pipeline_cache_filename.empty()) { return pipeline; } serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { @@ -477,7 +485,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -491,18 +499,19 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!pipeline_cache_filename.empty()) { - serialization_thread.QueueWork([this, key, env = std::move(env)] { - VideoCommon::SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); - }); + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); return pipeline; } std::unique_ptr PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, - bool build_in_parallel) { + bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -517,6 +526,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, thread_worker, program.info, std::move(spv_module)); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } } // namespace Vulkan -- cgit v1.2.3 From 56d4a9ebde4afa18329ba6df4995ed9ef2aa1ca1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:53:27 -0300 Subject: texture_cache: Reduce invalid image/sampler error severity --- src/video_core/texture_cache/texture_cache.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 255b07cf8..f34c9d9ca 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -548,13 +548,13 @@ void TextureCache

::FillComputeImageViews(std::span indices, template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - [[unlikely]] if (index > graphics_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = graphics_sampler_table.Read(index); SamplerId& id = graphics_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -562,13 +562,13 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - [[unlikely]] if (index > compute_sampler_table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } const auto [descriptor, is_new] = compute_sampler_table.Read(index); SamplerId& id = compute_sampler_ids[index]; - [[unlikely]] if (is_new) { + if (is_new) { id = FindSampler(descriptor); } return &slot_samplers[id]; @@ -669,7 +669,7 @@ ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, std::span cached_image_view_ids, u32 index) { if (index > table.Limit()) { - LOG_ERROR(HW_GPU, "Invalid image view index={}", index); + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } const auto [descriptor, is_new] = table.Read(index); -- cgit v1.2.3 From 99f2c31b64aa7854690368f4637ef59a546b2d15 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 29 May 2021 02:53:39 -0300 Subject: vulkan_device: Enable float64 and int64 conditionally Add Intel Xe support. --- src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++-- src/video_core/vulkan_common/vulkan_device.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e27a2b51e..aabcb0b10 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -251,8 +251,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = true, .shaderCullDistance = true, - .shaderFloat64 = true, - .shaderInt64 = true, + .shaderFloat64 = is_shader_float64_supported, + .shaderInt64 = is_shader_int64_supported, .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, @@ -909,6 +909,8 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_float64_supported = features.shaderFloat64; + is_shader_int64_supported = features.shaderInt64; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index ebe073293..693419505 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -314,6 +314,8 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_shader_float64_supported{}; ///< Support for float64. + bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. -- cgit v1.2.3 From a7e9756671be5bb99566277709e5becdea774f34 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 02:57:42 -0300 Subject: buffer_cache: Mark uniform buffers as dirty if any enable bit changes --- src/video_core/buffer_cache/buffer_cache.h | 10 +++++----- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 +++- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 1 + src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 3 +++ 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c92e4c30..d6b9eb99f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -142,7 +142,7 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetEnabledUniformBuffers(const std::array& mask); void SetEnabledComputeUniformBuffers(u32 enabled); @@ -670,13 +670,13 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffers != mask) { + dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffers = mask; } template diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 38ec88b13..976897067 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -100,6 +100,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + enabled_uniform_buffers[stage] = info.constant_buffer_mask; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; @@ -145,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); @@ -153,7 +156,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index c1113e180..bf33ce604 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,6 +99,7 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; + std::array enabled_uniform_buffers{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e5f54a84f..dfe6e6a80 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,6 +218,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { + return info ? info->constant_buffer_mask : 0; + }); auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -259,11 +262,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e362d13c5..4068a0edc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -128,7 +128,10 @@ private: std::vector transitions; std::array spv_modules; + std::array stage_infos; + std::array enabled_uniform_buffers{}; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; vk::PipelineLayout pipeline_layout; -- cgit v1.2.3 From 916ca7432474e891864524dcbc6c879d5cdbfb72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 03:40:19 -0300 Subject: opengl: Declare fragment outputs even if they are not used Fixes Ori and the Blind Forest's menu on GLASM. For some reason (probably high level optimizations) it is not sanitized on SPIR-V for OpenGL. Vulkan is unaffected by this change. --- src/shader_recompiler/backend/glasm/emit_context.cpp | 10 +++------- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 +-- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/shader_recompiler/profile.h | 4 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++++ 6 files changed, 18 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e18526816..08918a5c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -117,13 +117,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile index, index); } } - for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { - if (!info.stores_frag_color[index]) { - continue; - } - if (index == 0) { - Add("OUTPUT frag_color0=result.color;"); - } else { + if (stage == Stage::Fragment) { + Add("OUTPUT frag_color0=result.color;"); + for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index e23208d2c..70ca6f621 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -298,8 +298,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } - const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; - if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { + if (stage == Stage::Fragment) { header += "OPTION ARB_draw_buffers;"; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3e8899f53..7c618125e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1320,7 +1320,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { break; case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { - if (!info.stores_frag_color[index]) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { continue; } frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f8913bf14..f059e3b26 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -84,7 +84,11 @@ struct Profile { bool support_int64_atomics{}; bool warp_size_potentially_larger_than_guest{}; + bool lower_left_origin_mode{}; + /// Fragment outputs have to be declared even if they are not written to avoid undefined values. + /// See Ori and the Blind Forest's main menu for reference. + bool need_declared_frag_colors{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7d2ec4efa..6ea7c0ee8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -276,7 +276,9 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_int64_atomics = false, .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b6998e37c..cec51cc77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,9 +274,16 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .ignore_nan_fp_comparisons = false, }; } -- cgit v1.2.3 From c44b16124fcfb64b9482d639ae55670005eb6307 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:42:42 -0300 Subject: vk_buffer_cache: Add transform feedback usage to buffers --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 37 +++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 568993c58..2da3de6de 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -60,6 +60,27 @@ std::array MakeQuadIndices(u32 quad, u32 first) { } return indices; } + +vk::Buffer CreateBuffer(const Device& device, u64 size) { + VkBufferUsageFlags flags = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + if (device.IsExtTransformFeedbackSupported()) { + flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + } + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = flags, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -68,21 +89,7 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), - device{&runtime.device}, - buffer{device->GetLogical().CreateBuffer({ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - })}, + device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); -- cgit v1.2.3 From 77372443c3d6b20d7f78366bb4aa162f22bd7cde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:43:47 -0300 Subject: vulkan: Enable depth bounds and use it conditionally Intel devices pre-Xe don't support this. --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++++ src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index dfe6e6a80..d381109d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -598,13 +598,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthCompareOp = dynamic.depth_test_enable ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS, - .depthBoundsTestEnable = dynamic.depth_bounds_enable, + .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), .stencilTestEnable = dynamic.stencil_enable, .front = GetStencilFaceState(dynamic.front), .back = GetStencilFaceState(dynamic.back), .minDepthBounds = 0.0f, .maxDepthBounds = 0.0f, }; + if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + } static_vector cb_attachments; const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ef14e91e7..9611b480a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -682,6 +682,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } + bool enabled = regs.depth_bounds_enable; + if (enabled && !device.IsDepthBoundsSupported()) { + LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); + enabled = false; + } scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aabcb0b10..0a42efb6a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -226,7 +226,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthClamp = true, .depthBiasClamp = true, .fillModeNonSolid = true, - .depthBounds = false, + .depthBounds = is_depth_bounds_supported, .wideLines = false, .largePoints = true, .alphaToOne = false, @@ -908,6 +908,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { void Device::SetupFeatures() { const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_depth_bounds_supported = features.depthBounds; is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 693419505..1ab63ecd7 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + // Returns true if depth bounds is supported. + bool IsDepthBoundsSupported() const { + return is_depth_bounds_supported; + } + /// Returns true when blitting from and to depth stencil images is supported. bool IsBlitDepthStencilSupported() const { return is_blit_depth_stencil_supported; @@ -314,6 +319,7 @@ private: bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. + bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. -- cgit v1.2.3 From 1148a4eac715869077ace56a9a311a167643aca3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 20:44:28 -0300 Subject: vulkan: Conditionally use shaderInt16 Add support for Polaris AMD devices. --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 3 ++- src/video_core/vulkan_common/vulkan_device.h | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cec51cc77..2a2f166c8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -249,7 +249,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .unified_descriptor_binding = true, .support_descriptor_aliasing = true, .support_int8 = true, - .support_int16 = true, + .support_int16 = device.IsShaderInt16Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0a42efb6a..2b715baba 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -253,7 +253,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderCullDistance = true, .shaderFloat64 = is_shader_float64_supported, .shaderInt64 = is_shader_int64_supported, - .shaderInt16 = true, + .shaderInt16 = is_shader_int16_supported, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -912,6 +912,7 @@ void Device::SetupFeatures() { is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; is_shader_float64_supported = features.shaderFloat64; is_shader_int64_supported = features.shaderInt64; + is_shader_int16_supported = features.shaderInt16; is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); is_optimal_astc_supported = IsOptimalAstcSupported(features); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 1ab63ecd7..9bc1fb947 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -159,6 +159,11 @@ public: return is_formatless_image_load_supported; } + /// Returns true if shader int16 is supported. + bool IsShaderInt16Supported() const { + return is_shader_int16_supported; + } + // Returns true if depth bounds is supported. bool IsDepthBoundsSupported() const { return is_depth_bounds_supported; @@ -322,6 +327,7 @@ private: bool is_depth_bounds_supported{}; ///< Support for depth bounds. bool is_shader_float64_supported{}; ///< Support for float64. bool is_shader_int64_supported{}; ///< Support for int64. + bool is_shader_int16_supported{}; ///< Support for int16. bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images. bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. -- cgit v1.2.3 From d26271b0148e4c41d87199b3e42a5702d1a6be53 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 19:59:29 -0300 Subject: vk_swapchain: Avoid recreating the swapchain on each frame Recreate only when requested (or sRGB is changed) instead of tracking the frontend's size. That size is still used as a hint. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 18 ++++++------------ src/video_core/renderer_vulkan/vk_swapchain.h | 6 +++--- 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index d50647ba7..54c41bcaf 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,19 +97,14 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), - telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), - gpu(gpu_), - library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), - memory_allocator(device, false), - state_tracker(gpu), - scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), + state_tracker(gpu), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, @@ -139,17 +134,16 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); bool has_been_recreated = false; const auto recreate_swapchain = [&] { if (!has_been_recreated) { has_been_recreated = true; scheduler.WaitWorker(); } + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || - swapchain.HasDifferentLayout(layout.width, layout.height, is_srgb)) { + if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } bool needs_recreate; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index b38fd9dc2..df6da3d93 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -33,9 +33,9 @@ public: /// Presents the rendered image to the swapchain. void Present(VkSemaphore render_semaphore); - /// Returns true when the framebuffer layout has changed. - bool HasDifferentLayout(u32 width, u32 height, bool is_srgb) const { - return extent.width != width || extent.height != height || current_srgb != is_srgb; + /// Returns true when the color space has changed. + bool HasColorSpaceChanged(bool is_srgb) const { + return current_srgb != is_srgb; } /// Returns true when the image has to be recreated. -- cgit v1.2.3 From 46bd362d0dfab27e8c8d49f11eb4e3f373bf8f23 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 1 Jun 2021 20:37:45 -0300 Subject: fixed_pipeline_state: Use regular for loop instead of ranges for perf MSVC generates better code for it. --- src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 3a43c329f..1486d088a 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,9 +84,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); - std::ranges::transform(regs.rt, color_formats.begin(), - [](const auto& rt) { return static_cast(rt.format); }); + for (size_t i = 0; i < regs.rt.size(); ++i) { + color_formats[i] = static_cast(regs.rt[i].format); + } alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); -- cgit v1.2.3 From e57ee3b7fd8dd942b616d389635a4e9f00c596e9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 01:10:04 -0300 Subject: transform_feedback: Read buffer stride from index instead of layout --- src/video_core/transform_feedback.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index db52fff93..ba26ac3f1 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp @@ -72,8 +72,9 @@ std::vector MakeTransformFeedbackVaryings( const u32 base_offset = offset; const u8 location = locations[offset]; + UNIMPLEMENTED_IF_MSG(layout.stream != 0, "Stream is not zero: {}", layout.stream); Shader::TransformFeedbackVarying varying{ - .buffer = layout.stream, + .buffer = static_cast(buffer), .stride = layout.stride, .offset = offset * 4, .components = 1, -- cgit v1.2.3 From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- .../ir_opt/collect_shader_info_pass.cpp | 19 ++++++++-- src/shader_recompiler/shader_info.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 42 +++++++++++++--------- .../renderer_opengl/gl_compute_pipeline.cpp | 4 ++- .../renderer_opengl/gl_compute_pipeline.h | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 21 +++++------ .../renderer_opengl/gl_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 +++- .../renderer_vulkan/vk_compute_pipeline.h | 2 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- 11 files changed, 78 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6a5243c9f..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -560,32 +560,45 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufF32: case IR::Opcode::GetCbufU32x2: { - if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32(), 1); - } else { + const IR::Value index{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!index.IsImmediate()) { throw NotImplementedException("Constant buffer with non-immediate index"); } + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size{}; switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; + element_size = 1; break; case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: info.used_constant_buffer_types |= IR::Type::U16; + element_size = 2; break; case IR::Opcode::GetCbufU32: info.used_constant_buffer_types |= IR::Type::U32; + element_size = 4; break; case IR::Opcode::GetCbufF32: info.used_constant_buffer_types |= IR::Type::F32; + element_size = 4; break; case IR::Opcode::GetCbufU32x2: info.used_constant_buffer_types |= IR::Type::U32x2; + element_size = 8; break; default: break; } + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = std::max(size, offset.U32() + element_size); + } else { + size = 0x10'000; + } break; } case IR::Opcode::BindlessImageSampleImplicitLod: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d5b2ca7bc..32f8a50ea 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -197,6 +197,7 @@ struct Info { IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; + std::array constant_buffer_used_sizes{}; u32 nvn_buffer_base{}; std::bitset<16> nvn_buffer_used{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array; template class BufferCache { @@ -142,9 +144,10 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(const std::array& mask); + void SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); @@ -384,8 +387,11 @@ private: std::array compute_storage_buffers; std::array compute_texture_buffers; - std::array enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + std::array enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array enabled_storage_buffers{}; std::array written_storage_buffers{}; @@ -670,18 +676,22 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { +void BufferCache

::SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers != mask) { + if (enabled_uniform_buffer_masks != mask) { dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers = mask; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template -void BufferCache

::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache

::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template @@ -984,7 +994,7 @@ void BufferCache

::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -998,7 +1008,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -1113,11 +1123,11 @@ void BufferCache

::BindHostComputeUniformBuffers() { dirty_uniform_buffers.fill(~u32{0}); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1261,7 +1271,7 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { template void BufferCache

::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1334,7 +1344,7 @@ void BufferCache

::UpdateTransformFeedbackBuffer(u32 index) { template void BufferCache

::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 5cf5f97a9..61b6fe4b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -43,6 +43,8 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); num_image_buffers = AccumulateCount(info.image_buffer_descriptors); @@ -63,7 +65,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac } void ComputePipeline::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index dd6b62ef2..b5dfb65e9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -72,6 +72,7 @@ private: Shader::Info info; OGLProgram source_program; OGLAssemblyProgram assembly_program; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 976897067..a5d65fdca 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -60,6 +60,14 @@ std::pair TransformFeedbackEnum(u8 location) { UNIMPLEMENTED_MSG("index={}", index); return {GL_POSITION, 0}; } + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -100,7 +108,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } - enabled_uniform_buffers[stage] = info.constant_buffer_mask; + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; @@ -130,14 +139,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - void GraphicsPipeline::Configure(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; @@ -147,7 +148,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index bf33ce604..508fad5bb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,7 +99,8 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 168ffa7e9..ca59042ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -27,6 +28,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + auto func{[this, &descriptor_pool] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -75,7 +79,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, BufferCache& buffer_cache, TextureCache& texture_cache) { update_descriptor_queue.Acquire(); - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a560e382e..a6043866d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -44,6 +44,8 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + vk::ShaderModule spv_module; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d381109d6..627ca0158 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,10 +218,14 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { - return info ? info->constant_buffer_mask : 0; - }); - + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + const Shader::Info* const info{infos[stage]}; + if (!info) { + continue; + } + enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; + std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + } auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); @@ -262,7 +266,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4068a0edc..8c81c28a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -130,7 +130,8 @@ private: std::array spv_modules; std::array stage_infos; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; -- cgit v1.2.3 From 79f2fe1a39120f498e915fa0c740b15dc0f09793 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:02:33 -0300 Subject: glasm: Use ARB_derivative_control conditionally --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 7 +++--- .../backend/glasm/emit_glasm_warp.cpp | 29 +++++++++++++++++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 6 files changed, 37 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 70ca6f621..fc01797b6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -265,9 +265,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, // TODO: Track the shared atomic ops header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" - "OPTION NV_gpu_program_fp64;" - "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;"; + "OPTION NV_gpu_program_fp64;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -295,6 +293,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { header += "OPTION EXT_shader_image_load_formatted;"; } + if (profile.support_derivative_control) { + header += "OPTION ARB_derivative_control;"; + } if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 6e30790bb..8cec5ee7e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { @@ -111,19 +112,39 @@ void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDX.FINE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.FINE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDX {}.x,{};", inst, p); + } } void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDY.FINE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.FINE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDY {}.x,{};", inst, p); + } } void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDX.COARSE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.COARSE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDX {}.x,{};", inst, p); + } } void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDY.COARSE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.COARSE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDY {}.x,{};", inst, p); + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f059e3b26..3109fb69c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -82,6 +82,7 @@ struct Profile { bool support_typeless_image_loads{}; bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; + bool support_derivative_control{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 01da2bb57..3f7929f9e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -154,6 +154,7 @@ Device::Device() { has_precise_bug = TestPreciseBug(); has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_derivative_control = GLAD_GL_ARB_derivative_control; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d67f5693c..1ffd24883 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -96,6 +96,10 @@ public: return has_nv_viewport_array2; } + bool HasDerivativeControl() const { + return has_derivative_control; + } + bool HasDebuggingToolAttached() const { return has_debugging_tool_attached; } @@ -141,6 +145,7 @@ private: bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_derivative_control{}; bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ea7c0ee8..bdffac4b2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -274,6 +274,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = true, -- cgit v1.2.3 From 8c954fcaee77c70583c5edc73c7c35eefcca39b0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:30 -0300 Subject: vk_pipeline_cache: Set support_derivative_control to true --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2a2f166c8..a7f3619a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,6 +274,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .support_derivative_control = true, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), -- cgit v1.2.3 From 8f099af6a8ea691c5f8f1403848ca42077b34bd6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 04:28:59 -0300 Subject: nsight_aftermath_tracker: Fix SPIR-V module writes --- src/video_core/vulkan_common/nsight_aftermath_tracker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp index 209cb1e0a..fdd1a5081 100644 --- a/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp +++ b/src/video_core/vulkan_common/nsight_aftermath_tracker.cpp @@ -99,7 +99,7 @@ void NsightAftermathTracker::SaveShader(std::span spirv) const { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; } - if (file.Write(spirv) != spirv.size()) { + if (file.WriteSpan(spirv) != spirv.size()) { LOG_ERROR(Render_Vulkan, "Failed to write SPIR-V module with hash={:016x}", hash.hash); return; } -- cgit v1.2.3 From b02c78b276f449318bdc787a35d123df01c0bc6d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 18:50:01 -0300 Subject: vk_buffer_cache: Handle null texture buffers Fixes a crash on Age of Calamity cutscenes. --- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2da3de6de..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -97,6 +97,10 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast } VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + if (!device) { + // Null buffer, return a null descriptor + return VK_NULL_HANDLE; + } const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { return offset == view.offset && size == view.size && format == view.format; })}; -- cgit v1.2.3 From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: shader: Fix VertexA Shaders. --- src/shader_recompiler/frontend/maxwell/program.cpp | 19 +++++++++++----- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 24 +++++++++++++------- src/shader_recompiler/ir_opt/passes.h | 1 - src/video_core/renderer_opengl/gl_shader_cache.cpp | 26 +++++++++++++++++----- 4 files changed, 51 insertions(+), 19 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 900fc7ab1..8489f9a5f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -171,20 +171,29 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b IR::Program result{}; Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); - std::swap(result.blocks, vertex_a.blocks); - result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index b0a9f5258..a926123f2 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,16 +13,24 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexATransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } } -void VertexBTransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); -} - -void DualVertexJoinPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index e9cb8546a..5ebde49ea 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -25,7 +25,6 @@ void VerificationPass(const IR::Program& program); // Dual Vertex void VertexATransformPass(IR::Program& program); void VertexBTransformPass(IR::Program& program); -void DualVertexJoinPass(IR::Program& program); void JoinTextureInfo(Info& base, Info& source); void JoinStorageInfo(Info& base, Info& source); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index bdffac4b2..0e4904733 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -40,6 +40,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -446,6 +447,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( size_t env_index{}; u32 total_storage_buffers{}; std::array programs; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -454,11 +457,22 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + for (const auto& desc : program_vb.info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; @@ -472,7 +486,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From f45f7b5c2a869123340591cec6db58c33a5fd3ab Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 3 Jun 2021 17:42:24 -0300 Subject: vk_swapchain: Handle outdated swapchains Fixes pixelated presentation on Intel devices. --- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 11 +++++---- src/video_core/renderer_vulkan/vk_swapchain.cpp | 26 ++++++++++++++++------ src/video_core/renderer_vulkan/vk_swapchain.h | 14 ++++++++---- 3 files changed, 34 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 54c41bcaf..6fda06a7e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -143,18 +143,17 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height, is_srgb); }; - if (swapchain.NeedsRecreate() || swapchain.HasColorSpaceChanged(is_srgb)) { + if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { recreate_swapchain(); } - bool needs_recreate; + bool is_outdated; do { - needs_recreate = false; swapchain.AcquireNextImage(); - if (swapchain.NeedsRecreate()) { + is_outdated = swapchain.IsOutDated(); + if (is_outdated) { recreate_swapchain(); - needs_recreate = true; } - } while (needs_recreate); + } while (is_outdated); if (has_been_recreated) { blit_screen.Recreate(); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index a71b0b01e..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -65,7 +65,8 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul VKSwapchain::~VKSwapchain() = default; void VKSwapchain::Create(u32 width, u32 height, bool srgb) { - needs_recreate = false; + is_outdated = false; + is_suboptimal = false; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -85,11 +86,22 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { } void VKSwapchain::AcquireNextImage() { - const VkResult result = - device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits::max(), - *present_semaphores[frame_index], {}, &image_index); - needs_recreate |= result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR; - + const VkResult result = device.GetLogical().AcquireNextImageKHR( + *swapchain, std::numeric_limits::max(), *present_semaphores[frame_index], + VK_NULL_HANDLE, &image_index); + switch (result) { + case VK_SUCCESS: + break; + case VK_SUBOPTIMAL_KHR: + is_suboptimal = true; + break; + case VK_ERROR_OUT_OF_DATE_KHR: + is_outdated = true; + break; + default: + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); + break; + } scheduler.Wait(resource_ticks[image_index]); resource_ticks[image_index] = scheduler.CurrentTick(); } @@ -115,7 +127,7 @@ void VKSwapchain::Present(VkSemaphore render_semaphore) { LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); break; case VK_ERROR_OUT_OF_DATE_KHR: - needs_recreate = true; + is_outdated = true; break; default: LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index df6da3d93..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -38,9 +38,14 @@ public: return current_srgb != is_srgb; } - /// Returns true when the image has to be recreated. - bool NeedsRecreate() const { - return needs_recreate; + /// Returns true when the swapchain is outdated. + bool IsOutDated() const { + return is_outdated; + } + + /// Returns true when the swapchain is suboptimal. + bool IsSubOptimal() const { + return is_suboptimal; } VkExtent2D GetSize() const { @@ -95,7 +100,8 @@ private: VkExtent2D extent{}; bool current_srgb{}; - bool needs_recreate{}; + bool is_outdated{}; + bool is_suboptimal{}; }; } // namespace Vulkan -- cgit v1.2.3 From c736b9ffabc8a869d8ed131d365aff21b049f751 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:52:40 +0200 Subject: DMA: Restrict optimised path for BlockToLinear further. --- src/video_core/engines/maxwell_dma.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c51776466..c7ec1eac9 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -127,7 +127,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { // Optimized path for micro copies. const size_t dst_size = static_cast(regs.pitch_out) * regs.line_count; - if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X) { + if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && + regs.src_params.height > GOB_SIZE_Y) { FastCopyBlockLinearToPitch(); return; } -- cgit v1.2.3 From 2a0aeaa3d283f1c7f003c956ab3079f70246b008 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 21:48:38 -0300 Subject: vk_rasterizer: Flush work on clear and dispatches --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9611b480a..e72f8426b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -189,6 +189,7 @@ void RasterizerVulkan::Clear() { if (!maxwell3d.ShouldExecute()) { return; } + FlushWork(); query_cache.UpdateCounters(); @@ -259,6 +260,8 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { + FlushWork(); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { return; -- cgit v1.2.3 From 48aad8dc05f027c21aa0e8a68d827006d9f7a196 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 5 Jun 2021 22:10:02 -0300 Subject: vk_pipeline_cache: Add asynchronous shaders --- .../renderer_vulkan/vk_graphics_pipeline.h | 6 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 ++++++++++++++++++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 ++++ 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8c81c28a8..3f8895927 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -87,7 +87,7 @@ public: configure_func(this, is_indexed); } - GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { if (key == current_key) { return this; } @@ -96,6 +96,10 @@ public: : nullptr; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + template static auto MakeConfigureSpecFunc() { return [](GraphicsPipeline* pipeline, bool is_indexed) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a7f3619a5..741ed1a98 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -240,6 +240,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; @@ -303,7 +304,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; if (next) { current_pipeline = next; - return current_pipeline; + return BuiltPipeline(current_pipeline); } } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; @@ -318,7 +319,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline->AddTransition(pipeline.get()); } current_pipeline = pipeline.get(); - return current_pipeline; + return BuiltPipeline(current_pipeline); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -415,6 +416,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { + return nullptr; + } + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; +} + std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, std::span envs, bool build_in_parallel) try { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4116cc73f..869c63baf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -115,6 +115,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,6 +142,8 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; + bool use_asynchronous_shaders{}; + std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From cffd4716c5ebf9b93505b5bfa96d9b407f349336 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:11:36 -0300 Subject: vk_pipeline_cache,shader_notify: Add shader notifications --- .../renderer_vulkan/vk_compute_pipeline.cpp | 12 ++++- .../renderer_vulkan/vk_compute_pipeline.h | 7 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 30 +++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 22 +++++----- .../renderer_vulkan/vk_pipeline_cache.cpp | 50 ++++++++++++--------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 9 +++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_notify.cpp | 51 ++++++++++------------ src/video_core/shader_notify.h | 28 +++++++----- src/yuzu/main.cpp | 12 ++--- 10 files changed, 127 insertions(+), 96 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ca59042ff..cc855a62e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -14,6 +14,7 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -24,14 +25,18 @@ using Tegra::Texture::TexturePair; ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* thread_worker, const Shader::Info& info_, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); - auto func{[this, &descriptor_pool] { + auto func{[this, &descriptor_pool, shader_notify] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -66,6 +71,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a6043866d..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -18,6 +18,10 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { class Device; @@ -27,7 +31,8 @@ class ComputePipeline { public: explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* thread_worker, const Shader::Info& info, + Common::ThreadWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, vk::ShaderModule spv_module); ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 627ca0158..5c916c869 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -17,6 +17,7 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #if defined(_MSC_VER) && defined(NDEBUG) @@ -203,30 +204,30 @@ ConfigureFuncPtr ConfigureFunc(const std::array& m } } // Anonymous namespace -GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, VKScheduler& scheduler_, - BufferCache& buffer_cache_, TextureCache& texture_cache_, - const Device& device_, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue_, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key_, - std::array stages, - const std::array& infos) +GraphicsPipeline::GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, + VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, + RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, + std::array stages, + const std::array& infos) : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { const Shader::Info* const info{infos[stage]}; if (!info) { continue; } + stage_infos[stage] = *info; enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); } - auto func{[this, &render_pass_cache, &descriptor_pool] { + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); @@ -242,6 +243,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, std::lock_guard lock{build_mutex}; is_built = true; build_condvar.notify_one(); + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } }}; if (worker_thread) { worker_thread->QueueWork(std::move(func)); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3f8895927..40d1edabd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -20,6 +20,10 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { struct GraphicsPipelineCacheKey { @@ -64,16 +68,14 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, - Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, TextureCache& texture_cache, - const Device& device, DescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, - const GraphicsPipelineCacheKey& key, - std::array stages, - const std::array& infos); + explicit GraphicsPipeline( + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, + VideoCore::ShaderNotify* shader_notify, const Device& device, + DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, + const std::array& infos); GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 741ed1a98..e61d76490 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -235,11 +235,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, - TextureCache& texture_cache_) + TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { @@ -307,19 +307,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { return BuiltPipeline(current_pipeline); } } - const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& pipeline{pair->second}; - if (is_new) { - pipeline = CreateGraphicsPipeline(); - } - if (!pipeline) { - return nullptr; - } - if (current_pipeline) { - current_pipeline->AddTransition(pipeline.get()); - } - current_pipeline = pipeline.get(); - return BuiltPipeline(current_pipeline); + return CurrentGraphicsPipelineSlowPath(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -416,6 +404,22 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.WaitForRequests(); } +GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& pipeline{pair->second}; + if (is_new) { + pipeline = CreateGraphicsPipeline(); + } + if (!pipeline) { + return nullptr; + } + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return BuiltPipeline(current_pipeline); +} + GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { if (pipeline->IsBuilt()) { return pipeline; @@ -484,14 +488,16 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; + const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - return std::make_unique( - maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; + return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, + texture_cache, notify, device, descriptor_pool, + update_descriptor_queue, thread_worker, + render_pass_cache, key, std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -550,12 +556,14 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}", key.unique_hash)}; + const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, program.info, std::move(spv_module)); + thread_worker, notify, program.info, + std::move(spv_module)); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 869c63baf..167a2ee2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -38,6 +38,10 @@ namespace Shader::IR { struct Program; } +namespace VideoCore { +class ShaderNotify; +} + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -104,7 +108,7 @@ public: VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, - TextureCache& texture_cache); + TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -115,6 +119,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; std::unique_ptr CreateGraphicsPipeline(); @@ -138,6 +144,7 @@ private: RenderPassCache& render_pass_cache; BufferCache& buffer_cache; TextureCache& texture_cache; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e72f8426b..d284b3653 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -140,7 +140,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, - texture_cache), + texture_cache, gpu.ShaderNotify()), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index 693e47158..dc6995b46 100644 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -2,42 +2,35 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include +#include + #include "video_core/shader_notify.h" using namespace std::chrono_literals; namespace VideoCore { -namespace { -constexpr auto UPDATE_TICK = 32ms; -} - -ShaderNotify::ShaderNotify() = default; -ShaderNotify::~ShaderNotify() = default; -std::size_t ShaderNotify::GetShadersBuilding() { - const auto now = std::chrono::high_resolution_clock::now(); - const auto diff = now - last_update; - if (diff > UPDATE_TICK) { - std::shared_lock lock(mutex); - last_updated_count = accurate_count; +const auto TIME_TO_STOP_REPORTING = 2s; + +int ShaderNotify::ShadersBuilding() noexcept { + const int now_complete = num_complete.load(std::memory_order::relaxed); + const int now_building = num_building.load(std::memory_order::relaxed); + if (now_complete == now_building) { + const auto now = std::chrono::high_resolution_clock::now(); + if (completed && num_complete == num_when_completed) { + if (now - complete_time > TIME_TO_STOP_REPORTING) { + report_base = now_complete; + completed = false; + } + } else { + completed = true; + num_when_completed = num_complete; + complete_time = now; + } } - return last_updated_count; -} - -std::size_t ShaderNotify::GetShadersBuildingAccurate() { - std::shared_lock lock{mutex}; - return accurate_count; -} - -void ShaderNotify::MarkShaderComplete() { - std::unique_lock lock{mutex}; - accurate_count--; -} - -void ShaderNotify::MarkSharderBuilding() { - std::unique_lock lock{mutex}; - accurate_count++; + return now_building - report_base; } } // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index a9c92d179..ad363bfb5 100644 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -4,26 +4,30 @@ #pragma once +#include #include -#include -#include "common/common_types.h" +#include namespace VideoCore { class ShaderNotify { public: - ShaderNotify(); - ~ShaderNotify(); + [[nodiscard]] int ShadersBuilding() noexcept; - std::size_t GetShadersBuilding(); - std::size_t GetShadersBuildingAccurate(); + void MarkShaderComplete() noexcept { + ++num_complete; + } - void MarkShaderComplete(); - void MarkSharderBuilding(); + void MarkShaderBuilding() noexcept { + ++num_building; + } private: - std::size_t last_updated_count{}; - std::size_t accurate_count{}; - std::shared_mutex mutex; - std::chrono::high_resolution_clock::time_point last_update{}; + std::atomic_int num_building{}; + std::atomic_int num_complete{}; + int report_base{}; + + bool completed{}; + int num_when_completed{}; + std::chrono::high_resolution_clock::time_point complete_time; }; } // namespace VideoCore diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 03a909d17..7e0b1adc4 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2900,13 +2900,13 @@ void GMainWindow::UpdateStatusBar() { return; } - auto results = Core::System::GetInstance().GetAndResetPerfStats(); - auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); - const auto shaders_building = shader_notify.GetShadersBuilding(); + auto& system = Core::System::GetInstance(); + auto results = system.GetAndResetPerfStats(); + auto& shader_notify = system.GPU().ShaderNotify(); + const int shaders_building = shader_notify.ShadersBuilding(); - if (shaders_building != 0) { - shader_building_label->setText( - tr("Building: %n shader(s)", "", static_cast(shaders_building))); + if (shaders_building > 0) { + shader_building_label->setText(tr("Building: %n shader(s)", "", shaders_building)); shader_building_label->setVisible(true); } else { shader_building_label->setVisible(false); -- cgit v1.2.3 From 12fe7210d2b546bd9c5825b6517b80efc818a7fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:35:57 -0300 Subject: gl_shader_cache: Store workers in shader cache object --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 129 ++++++++++++--------- src/video_core/renderer_opengl/gl_shader_cache.h | 7 ++ 2 files changed, 78 insertions(+), 58 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0e4904733..9d6cef6e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -239,6 +239,15 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace +struct ShaderCache::Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, @@ -247,46 +256,49 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} { - profile = Shader::Profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = - device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = device.HasNvViewportArray2(), - .support_typeless_image_loads = device.HasImageLoadFormatted(), - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - .support_derivative_control = device.HasDerivativeControl(), - - .warp_size_potentially_larger_than_guest = true, - - .lower_left_origin_mode = true, - .need_declared_frag_colors = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, - }; + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, + use_asynchronous_shaders{device.UseAsynchronousShaders()}, + profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = device.HasNvViewportArray2(), + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), + + .warp_size_potentially_larger_than_guest = true, + + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + } { + if (use_asynchronous_shaders) { + workers = CreateWorkers(); + } } ShaderCache::~ShaderCache() = default; @@ -307,29 +319,20 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, } shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); - struct Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; - }; - Common::StatefulThreadWorker workers( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); - + if (!workers) { + workers = CreateWorkers(); + } struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { ComputePipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; @@ -347,7 +350,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { boost::container::static_vector env_ptrs; for (auto& env : envs) { @@ -373,7 +376,10 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, state.has_loaded = true; lock.unlock(); - workers.WaitForRequests(); + workers->WaitForRequests(); + if (!use_asynchronous_shaders) { + workers.reset(); + } } GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { @@ -570,4 +576,11 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } +std::unique_ptr> ShaderCache::CreateWorkers() + const { + return std::make_unique>( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index cf74d34e4..e0c5a06d8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -12,6 +12,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -44,6 +45,8 @@ struct ShaderPools { }; class ShaderCache : public VideoCommon::ShaderCache { + struct Context; + public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -74,6 +77,8 @@ private: const ComputePipelineKey& key, Shader::Environment& env); + std::unique_ptr> CreateWorkers() const; + Core::Frontend::EmuWindow& emu_window; const Device& device; TextureCache& texture_cache; @@ -82,6 +87,7 @@ private: StateTracker& state_tracker; GraphicsPipelineKey graphics_key{}; + const bool use_asynchronous_shaders; ShaderPools main_pools; std::unordered_map> graphics_cache; @@ -89,6 +95,7 @@ private: Shader::Profile profile; std::filesystem::path shader_cache_filename; + std::unique_ptr> workers; }; } // namespace OpenGL -- cgit v1.2.3 From b1ed64ac18fe7b5fc89abe06442527d8c440ddc7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 01:28:22 -0300 Subject: gl_shader_util: Move shader utility code to a separate file --- .../renderer_opengl/gl_resource_manager.cpp | 27 ----- .../renderer_opengl/gl_resource_manager.h | 14 --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 81 +------------- src/video_core/renderer_opengl/gl_shader_util.cpp | 117 ++++++++++++++------- src/video_core/renderer_opengl/gl_shader_util.h | 89 ++-------------- src/video_core/renderer_opengl/renderer_opengl.cpp | 12 +-- src/video_core/renderer_opengl/util_shaders.cpp | 11 +- 7 files changed, 106 insertions(+), 245 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3428e5e21..8695c29e3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -83,18 +83,6 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(std::string_view source, GLenum type) { - if (handle != 0) { - return; - } - if (source.empty()) { - return; - } - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = GLShader::LoadShader(source, type); -} - void OGLShader::Release() { if (handle == 0) return; @@ -104,21 +92,6 @@ void OGLShader::Release() { handle = 0; } -void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, - const char* frag_shader, bool separable_program, - bool hint_retrievable) { - OGLShader vert, geo, frag; - if (vert_shader) - vert.Create(vert_shader, GL_VERTEX_SHADER); - if (geo_shader) - geo.Create(geo_shader, GL_GEOMETRY_SHADER); - if (frag_shader) - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle); -} - void OGLProgram::Release() { if (handle == 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 552d79db4..b2d5bfd3b 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -8,7 +8,6 @@ #include #include #include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -128,8 +127,6 @@ public: return *this; } - void Create(std::string_view source, GLenum type); - void Release(); GLuint handle = 0; @@ -151,17 +148,6 @@ public: return *this; } - template - void Create(bool separable_program, bool hint_retrievable, T... shaders) { - if (handle != 0) - return; - handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...); - } - - /// Creates a new internal OpenGL resource and stores the handle - void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, - bool separable_program = false, bool hint_retrievable = false); - /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9d6cef6e8..da0b36368 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" #include "video_core/shader_environment.h" @@ -53,77 +54,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -void AddShader(GLenum stage, GLuint program, std::span code) { - OGLShader shader; - shader.handle = glCreateShader(stage); - - glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), - static_cast(code.size_bytes())); - glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); - if (!Settings::values.renderer_debug) { - return; - } - GLint shader_status{}; - glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "Failed to build shader"); - } - GLint log_length{}; - glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { - OGLAssemblyProgram program; - glGenProgramsARB(1, &program.handle); - glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(code.size()), code.data()); - if (Settings::values.renderer_debug) { - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - if (std::strstr(err, "error")) { - LOG_CRITICAL(Render_OpenGL, "\n{}", err); - LOG_INFO(Render_OpenGL, "\n{}", code); - } else { - LOG_WARNING(Render_OpenGL, "\n{}", err); - } - } - } - return program; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -492,9 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; - ++index) { + const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; + for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -510,7 +439,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; - AddShader(Stage(stage_index), source_program.handle, code); + AttachShader(Stage(stage_index), source_program.handle, code); } } if (!device.UseAssemblyShaders()) { @@ -565,7 +494,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } else { const std::vector code{EmitSPIRV(profile, program)}; source_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4bf0d6090..99cb81819 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -5,57 +5,100 @@ #include #include #include + #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/renderer_opengl/gl_shader_util.h" -namespace OpenGL::GLShader { - -namespace { +namespace OpenGL { -std::string_view StageDebugName(GLenum type) { - switch (type) { - case GL_VERTEX_SHADER: - return "vertex"; - case GL_GEOMETRY_SHADER: - return "geometry"; - case GL_FRAGMENT_SHADER: - return "fragment"; - case GL_COMPUTE_SHADER: - return "compute"; +static void LogShader(GLuint shader) { + GLint shader_status{}; + glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); } - UNIMPLEMENTED(); - return "unknown"; } -} // Anonymous namespace +void AttachShader(GLenum stage, GLuint program, std::string_view code) { + OGLShader shader; + shader.handle = glCreateShader(stage); -GLuint LoadShader(std::string_view source, GLenum type) { - const std::string_view debug_type = StageDebugName(type); - const GLuint shader_id = glCreateShader(type); + const GLint length = static_cast(code.size()); + const GLchar* const code_ptr = code.data(); + glShaderSource(shader.handle, 1, &code_ptr, &length); + glCompileShader(shader.handle); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} - const GLchar* source_string = source.data(); - const GLint source_length = static_cast(source.size()); +void AttachShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); - glShaderSource(shader_id, 1, &source_string, &source_length); - LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); - glCompileShader(shader_id); + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); - GLint result = GL_FALSE; - GLint info_log_length; - glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} - if (info_log_length > 1) { - std::string shader_error(info_log_length, ' '); - glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", shader_error); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } } } - return shader_id; + return program; } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1b770532e..ff5aa024f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,92 +4,25 @@ #pragma once +#include #include +#include #include + #include + #include "common/assert.h" #include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" -namespace OpenGL::GLShader { - -/** - * Utility function to log the source code of a list of shaders. - * @param shaders The OpenGL shaders whose source we will print. - */ -template -void LogShaderSource(T... shaders) { - auto shader_list = {shaders...}; - - for (const auto& shader : shader_list) { - if (shader == 0) - continue; - - GLint source_length; - glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length); - - std::string source(source_length, ' '); - glGetShaderSource(shader, source_length, nullptr, &source[0]); - LOG_INFO(Render_OpenGL, "Shader source {}", source); - } -} - -/** - * Utility function to create and compile an OpenGL GLSL shader - * @param source String of the GLSL shader program - * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) - */ -GLuint LoadShader(std::string_view source, GLenum type); - -/** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param separable_program whether to create a separable program - * @param shaders ID of shaders to attach to the program - * @returns Handle of the newly created OpenGL program object - */ -template -GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) { - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - - ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - if (hint_retrievable) { - glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - GLint result = GL_FALSE; - GLint info_log_length; - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::string program_error(info_log_length, ' '); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", program_error); - } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); - } - } +namespace OpenGL { - if (result == GL_FALSE) { - // There was a problem linking the shader, print the source for debugging purposes. - LogShaderSource(shaders...); - } +void AttachShader(GLenum stage, GLuint program, std::string_view code); - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); +void AttachShader(GLenum stage, GLuint program, std::span code); - ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); +void LinkProgram(GLuint program); - return program_id; -} +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a4805f3da..b8777643b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,6 +24,7 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" @@ -230,13 +231,10 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - OGLShader vertex_shader; - vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - - OGLShader fragment_shader; - fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - - present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); + present_program.handle = glCreateProgram(); + AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); + AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); + LinkProgram(present_program.handle); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 51e72b705..8aa0683c8 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -17,6 +17,7 @@ #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -40,13 +41,12 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; using VideoCore::Surface::BytesPerBlock; namespace { - OGLProgram MakeProgram(std::string_view source) { - OGLShader shader; - shader.Create(source, GL_COMPUTE_SHADER); - OGLProgram program; - program.Create(true, false, shader.handle); + OGLShader shader; + program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, program.handle, source); + LinkProgram(program.handle); return program; } @@ -54,7 +54,6 @@ size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { return static_cast(copy.extent.width * copy.extent.height * copy.src_subresource.num_layers); } - } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) -- cgit v1.2.3 From 7eaa74ad235b669608debaf3583af94bd675b6c6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 7 Jun 2021 20:43:00 -0300 Subject: gl_texture_cache: Create image storage views Fixes SULD.D tests. --- .../renderer_opengl/gl_compute_pipeline.cpp | 5 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 5 +- .../renderer_opengl/gl_texture_cache.cpp | 132 +++++++++++++++------ src/video_core/renderer_opengl/gl_texture_cache.h | 22 +++- 4 files changed, 126 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 61b6fe4b7..a40106c87 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -178,7 +178,10 @@ void ComputePipeline::Configure() { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a5d65fdca..a2ea35d5a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -332,7 +332,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } }}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7053be161..c373c9cb4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -328,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return GL_R8I; + case Shader::ImageFormat::R8_UINT: + return GL_R8UI; + case Shader::ImageFormat::R16_UINT: + return GL_R16UI; + case Shader::ImageFormat::R16_SINT: + return GL_R16I; + case Shader::ImageFormat::R32_UINT: + return GL_R32UI; + case Shader::ImageFormat::R32G32_UINT: + return GL_RG32UI; + case Shader::ImageFormat::R32G32B32A32_UINT: + return GL_RGBA32UI; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return GL_R32UI; +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -837,21 +859,28 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } else { internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } - VideoCommon::SubresourceRange flatten_range = info.range; - std::array handles; - stored_views.reserve(2); - + full_range = info.range; + flat_range = info.range; + set_object_label = device.HasDebuggingToolAttached(); + is_render_target = info.IsRenderTarget(); + original_texture = image.texture.handle; + num_samples = image.info.num_samples; + if (!is_render_target) { + swizzle[0] = info.x_source; + swizzle[1] = info.y_source; + swizzle[2] = info.z_source; + swizzle[3] = info.w_source; + } switch (info.type) { case ImageViewType::e1DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e1D: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); + SetupView(Shader::TextureType::Color1D); + SetupView(Shader::TextureType::ColorArray1D); break; case ImageViewType::e2DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e2D: if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { @@ -861,26 +890,23 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .base = {.level = info.range.base.level, .layer = 0}, .extent = {.levels = 1, .layers = 1}, }; - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + full_range = slice_range; + + SetupView(Shader::TextureType::Color3D); } else { - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, - info.range); + SetupView(Shader::TextureType::Color2D); + SetupView(Shader::TextureType::ColorArray2D); } break; case ImageViewType::e3D: - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); + SetupView(Shader::TextureType::Color3D); break; case ImageViewType::CubeArray: - flatten_range.extent.layers = 6; + flat_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); + SetupView(Shader::TextureType::ColorCube); + SetupView(Shader::TextureType::ColorArrayCube); break; case ImageViewType::Rect: UNIMPLEMENTED(); @@ -928,22 +954,62 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, - GLuint handle, const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range) { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, +GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + GLuint& view{type_views[static_cast(texture_type)]}; + if (view == 0) { + view = MakeView(texture_type, ShaderFormat(image_format)); + } + return view; +} + +void ImageView::SetupView(Shader::TextureType view_type) { + views[static_cast(view_type)] = MakeView(view_type, internal_format); +} + +GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { + VideoCommon::SubresourceRange view_range; + switch (view_type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Color2D: + case Shader::TextureType::ColorCube: + view_range = flat_range; + break; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::Color3D: + case Shader::TextureType::ColorArrayCube: + view_range = full_range; + break; + default: + UNREACHABLE(); + } + OGLTextureView& view = stored_views.emplace_back(); + view.Create(); + + const GLenum target = ImageTarget(view_type, num_samples); + glTextureView(view.handle, target, original_texture, view_format, view_range.base.level, view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); + if (!is_render_target) { + std::array casted_swizzle; + std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) { + return static_cast(component_swizzle); + }); + ApplySwizzle(view.handle, format, casted_swizzle); } - if (device.HasDebuggingToolAttached()) { + if (set_object_label) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + glObjectLabel(GL_TEXTURE, view.handle, static_cast(name.size()), name.data()); } - stored_views.emplace_back().handle = handle; - views[static_cast(view_type)] = handle; + return view.handle; } Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 2e3e02b79..921072ebe 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -185,6 +185,9 @@ public: const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { return views[static_cast(handle_type)]; } @@ -206,16 +209,29 @@ public: } private: - void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, - const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range); + struct StorageViews { + std::array signeds{}; + std::array unsigneds{}; + }; + + void SetupView(Shader::TextureType view_type); + + GLuint MakeView(Shader::TextureType view_type, GLenum view_format); std::array views{}; std::vector stored_views; + std::unique_ptr storage_views; GLenum internal_format = GL_NONE; GLuint default_handle = 0; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; + GLuint original_texture = 0; + int num_samples = 0; + VideoCommon::SubresourceRange flat_range; + VideoCommon::SubresourceRange full_range; + std::array swizzle{}; + bool set_object_label = false; + bool is_render_target = false; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; -- cgit v1.2.3 From 15bdd27cac4a0b1e6cd168272dc337cd685ef144 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 9 Jun 2021 23:33:48 -0400 Subject: shader_environment: Add shader_local_memory_crs_size to local memory size Fixes DOOM 2016 missing local memory --- src/shader_recompiler/program_header.h | 4 ++-- src/video_core/shader_environment.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 15f43f2d8..6933750aa 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -230,8 +230,8 @@ struct ProgramHeader { }; [[nodiscard]] u64 LocalMemorySize() const noexcept { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); + return static_cast(common1.shader_local_memory_low_size) | + (static_cast(common2.shader_local_memory_high_size) << 24); } }; static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index c93174519..a7a57a36f 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -69,7 +69,7 @@ u32 GenericEnvironment::TextureBoundBuffer() const { } u32 GenericEnvironment::LocalMemorySize() const { - return local_memory_size; + return local_memory_size + sph.common3.shader_local_memory_crs_size; } u32 GenericEnvironment::SharedMemorySize() const { -- cgit v1.2.3 From 60a96c49e59e600685b9a79d80b2685318b4fb64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:24:12 -0300 Subject: buffer_cache: Fix copy based uniform bindings tracking --- src/video_core/buffer_cache/buffer_cache.h | 19 +++++++++++++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 12 +++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ec64f2293..47cb0a47d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -680,6 +680,9 @@ void BufferCache

::SetUniformBuffersState(const std::array& m const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } dirty_uniform_buffers.fill(~u32{0}); } } @@ -1020,6 +1023,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia if (!HasFastUniformBufferBound(stage, binding_index)) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1027,8 +1031,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -1046,9 +1051,15 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // This exists to avoid instances where the fast buffer is bound and a GPU write happens return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bc16abafb..060d36427 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -92,16 +92,14 @@ public: VideoCore::Surface::PixelFormat format); void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast(size); if (use_assembly_shaders) { - const GLuint handle = fast_uniforms[stage][binding_index].handle; - const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - fast_uniforms[stage][binding_index].handle, 0, - static_cast(size)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size); } } @@ -134,6 +132,10 @@ public: return has_fast_buffer_sub_data; } + [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept { + return !use_assembly_shaders; + } + void SetBaseUniformBindings(const std::array& bindings) { graphics_base_uniform_bindings = bindings; } -- cgit v1.2.3 From 5befc0bf872058315c4f81bf58dcd173db2589fd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:27:00 -0300 Subject: shader_environment: Fix local memory size calculations --- src/video_core/shader_environment.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index a7a57a36f..6243cd176 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -69,7 +69,7 @@ u32 GenericEnvironment::TextureBoundBuffer() const { } u32 GenericEnvironment::LocalMemorySize() const { - return local_memory_size + sph.common3.shader_local_memory_crs_size; + return local_memory_size; } u32 GenericEnvironment::SharedMemorySize() const { @@ -233,7 +233,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, } const u64 local_size{sph.LocalMemorySize()}; ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); + local_memory_size = static_cast(local_size) + sph.common3.shader_local_memory_crs_size; texture_bound = maxwell3d->regs.tex_cb_index; } @@ -261,7 +261,7 @@ ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_com &kepler_compute_} { const auto& qmd{kepler_compute->launch_description}; stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; + local_memory_size = qmd.local_pos_alloc + qmd.local_crs_alloc; texture_bound = kepler_compute->regs.tex_cb_index; shared_memory_size = qmd.shared_alloc; workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; -- cgit v1.2.3 From cd8427367ed372e355fa76a78d41b3bc64f997ca Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 10 Jun 2021 01:55:27 -0400 Subject: gl_buffer_cache: Use unorm internal formats for snorm texture buffer views Fixes black textures in UE4 games --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 334ed470f..0703614de 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -25,6 +25,25 @@ constexpr std::array PROGRAM_LUT{ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; + +[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { + switch (gl_format) { + case GL_RGBA8_SNORM: + return GL_RGBA8; + case GL_R8_SNORM: + return GL_R8; + case GL_RGBA16_SNORM: + return GL_RGBA16; + case GL_R16_SNORM: + return GL_R16; + case GL_RG16_SNORM: + return GL_RG16; + case GL_RG8_SNORM: + return GL_RG8; + default: + return gl_format; + } +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -76,7 +95,11 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { OGLTexture texture; texture.Create(GL_TEXTURE_BUFFER); const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; - glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + const GLenum texture_format{GetTextureBufferFormat(gl_format)}; + if (texture_format != gl_format) { + LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); + } + glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); views.push_back({ .offset = offset, .size = size, -- cgit v1.2.3 From d554778311c32e0a19ecdc13d7525b264d8443b5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:52:04 -0300 Subject: vulkan: Use VK_EXT_provoking_vertex when available --- src/video_core/engines/maxwell_3d.h | 7 +++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 4 ++-- .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 10 +++++++- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++ src/video_core/vulkan_common/vulkan_device.h | 6 +++++ 6 files changed, 52 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cbf94412b..04d5790f6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1151,7 +1151,11 @@ public: u32 index; } primitive_restart; - INSERT_PADDING_WORDS_NOINIT(0x5F); + INSERT_PADDING_WORDS_NOINIT(0xE); + + u32 provoking_vertex_last; + + INSERT_PADDING_WORDS_NOINIT(0x50); struct { u32 start_addr_high; @@ -1672,6 +1676,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); +ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1486d088a..f121fbf0e 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -84,6 +84,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); depth_format.Assign(static_cast(regs.zeta.format)); + y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); + provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); @@ -91,8 +93,6 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 0f1eff9cd..60adae316 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -192,6 +192,7 @@ struct FixedPipelineState { BitField<4, 1, u32> depth_enabled; BitField<5, 5, u32> depth_format; BitField<10, 1, u32> y_negate; + BitField<11, 1, u32> provoking_vertex_last; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 5c916c869..06a80c2ba 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -567,9 +567,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { viewport_ci.pNext = &swizzle_ci; } + const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; const VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, + .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -586,6 +593,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2b715baba..618535aae 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + if (ext_provoking_vertex) { + provoking_vertex = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, + .pNext = nullptr, + .provokingVertexLast = VK_TRUE, + .transformFeedbackPreservesProvokingVertex = VK_TRUE, + }; + SetNext(next, provoking_vertex); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -718,6 +731,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_custom_border_color{}; bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; + bool has_ext_provoking_vertex{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -748,6 +762,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); + test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -799,6 +814,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { } else { is_warp_potentially_bigger = true; } + if (has_ext_provoking_vertex) { + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; + provoking_vertex.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT; + provoking_vertex.pNext = nullptr; + features.pNext = &provoking_vertex; + physical.GetFeatures2KHR(features); + + if (provoking_vertex.provokingVertexLast && + provoking_vertex.transformFeedbackPreservesProvokingVertex) { + extensions.push_back(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); + ext_provoking_vertex = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 9bc1fb947..37f589612 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -244,6 +244,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_provoking_vertex. + bool IsExtProvokingVertexSupported() const { + return ext_provoking_vertex; + } + /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { return ext_shader_atomic_int64; @@ -346,6 +351,7 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached -- cgit v1.2.3 From 3025b2f605df74a129f0f47aadd4247055ecd6bd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 11 Jun 2021 21:53:38 -0300 Subject: vk_rasterizer: Implement first index --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d284b3653..e339e9739 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -54,6 +54,7 @@ struct DrawParams { u32 num_instances; u32 base_vertex; u32 num_vertices; + u32 first_index; bool is_indexed; }; @@ -103,6 +104,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan .num_instances = is_instanced ? num_instances : 1, .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, + .first_index = is_indexed ? regs.index_array.first : 0, .is_indexed = is_indexed, }; if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { @@ -173,8 +175,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { - cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, - draw_params.base_vertex, draw_params.base_instance); + cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, + draw_params.first_index, draw_params.base_vertex, + draw_params.base_instance); } else { cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, draw_params.base_vertex, draw_params.base_instance); -- cgit v1.2.3 From cb78a1b494be2f6bc0927ed5b7a878236a3dc1c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 01:46:30 -0300 Subject: shader: Reorder shader cache directories --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 13 +++++-------- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 +++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index da0b36368..9391a4cd9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -238,16 +238,13 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "new_opengl"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories"); return; } - shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + shader_cache_filename = base_dir / "opengl.bin"; if (!workers) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e61d76490..6df4088a7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -337,22 +337,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "vulkan"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); return; } - pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + pipeline_cache_filename = base_dir / "vulkan.bin"; struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { -- cgit v1.2.3 From ea038d66538975319858f792052af1d0fa997fe3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 05:07:52 -0300 Subject: vulkan: Add VK_EXT_vertex_input_dynamic_state support Reduces the number of total pipelines generated on Vulkan. Tested on Super Smash Bros. Ultimate. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 67 ++++++++----- .../renderer_vulkan/fixed_pipeline_state.h | 73 ++++++++------ .../renderer_vulkan/vk_graphics_pipeline.cpp | 107 +++++++++++++-------- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 +++++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 56 +++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 2 + .../renderer_vulkan/vk_state_tracker.cpp | 50 ++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 8 +- src/video_core/vulkan_common/vulkan_device.h | 6 ++ src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 8 ++ 11 files changed, 291 insertions(+), 116 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index f121fbf0e..16cef8711 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -50,7 +50,7 @@ void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& } // Anonymous namespace void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, - bool has_extended_dynamic_state) { + bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { const Maxwell& regs = maxwell3d.regs; const std::array enabled_lut{ regs.polygon_offset_point_enable, @@ -60,7 +60,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; - no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1); + extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); + dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); xfb_enabled.Assign(regs.tfb_enabled != 0); primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); @@ -73,11 +74,11 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); - rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); msaa_mode.Assign(regs.multisample_mode); raw2 = 0; + rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); const auto test_func = regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); @@ -93,24 +94,44 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { - maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); - binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; - } - } - if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { - maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); + if (maxwell3d.dirty.flags[Dirty::VertexInput]) { + if (has_dynamic_vertex_input) { + // Dirty flag will be reset by the command buffer update + static constexpr std::array LUT{ + 0u, // Invalid + 1u, // SignedNorm + 1u, // UnsignedNorm + 2u, // SignedInt + 3u, // UnsignedInt + 1u, // UnsignedScaled + 1u, // SignedScaled + 1u, // Float + }; + const auto& attrs = regs.vertex_attrib_format; + attribute_types = 0; + for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { + const u32 mask = attrs[i].constant != 0 ? 0 : 3; + const u32 type = LUT[static_cast(attrs[i].type.Value())]; + attribute_types |= static_cast(type & mask) << (i * 2); + } + } else { + maxwell3d.dirty.flags[Dirty::VertexInput] = false; + enabled_divisors = 0; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + enabled_divisors |= (is_enabled ? u64{1} : 0) << index; + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); + } } } if (maxwell3d.dirty.flags[Dirty::Blending]) { @@ -126,10 +147,10 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, return static_cast(viewport.swizzle.raw); }); } - if (no_extended_dynamic_state != 0) { + if (!extended_dynamic_state) { dynamic_state.Refresh(regs); } - if (xfb_enabled != 0) { + if (xfb_enabled) { RefreshXfbState(xfb_state, regs); } } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 60adae316..04f34eb97 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -168,44 +168,51 @@ struct FixedPipelineState { union { u32 raw1; - BitField<0, 1, u32> no_extended_dynamic_state; - BitField<1, 1, u32> xfb_enabled; - BitField<2, 1, u32> primitive_restart_enable; - BitField<3, 1, u32> depth_bias_enable; - BitField<4, 1, u32> depth_clamp_disabled; - BitField<5, 1, u32> ndc_minus_one_to_one; - BitField<6, 2, u32> polygon_mode; - BitField<8, 5, u32> patch_control_points_minus_one; - BitField<13, 2, u32> tessellation_primitive; - BitField<15, 2, u32> tessellation_spacing; - BitField<17, 1, u32> tessellation_clockwise; - BitField<18, 1, u32> logic_op_enable; - BitField<19, 4, u32> logic_op; - BitField<23, 1, u32> rasterize_enable; + BitField<0, 1, u32> extended_dynamic_state; + BitField<1, 1, u32> dynamic_vertex_input; + BitField<2, 1, u32> xfb_enabled; + BitField<3, 1, u32> primitive_restart_enable; + BitField<4, 1, u32> depth_bias_enable; + BitField<5, 1, u32> depth_clamp_disabled; + BitField<6, 1, u32> ndc_minus_one_to_one; + BitField<7, 2, u32> polygon_mode; + BitField<9, 5, u32> patch_control_points_minus_one; + BitField<14, 2, u32> tessellation_primitive; + BitField<16, 2, u32> tessellation_spacing; + BitField<18, 1, u32> tessellation_clockwise; + BitField<19, 1, u32> logic_op_enable; + BitField<20, 4, u32> logic_op; BitField<24, 4, Maxwell::PrimitiveTopology> topology; BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; }; union { u32 raw2; - BitField<0, 3, u32> alpha_test_func; - BitField<3, 1, u32> early_z; - BitField<4, 1, u32> depth_enabled; - BitField<5, 5, u32> depth_format; - BitField<10, 1, u32> y_negate; - BitField<11, 1, u32> provoking_vertex_last; + BitField<0, 1, u32> rasterize_enable; + BitField<1, 3, u32> alpha_test_func; + BitField<4, 1, u32> early_z; + BitField<5, 1, u32> depth_enabled; + BitField<6, 5, u32> depth_format; + BitField<11, 1, u32> y_negate; + BitField<12, 1, u32> provoking_vertex_last; }; std::array color_formats; u32 alpha_test_ref; u32 point_size; - std::array binding_divisors; - std::array attributes; std::array attachments; std::array viewport_swizzles; + union { + u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state + u64 enabled_divisors; + }; + std::array attributes; + std::array binding_divisors; + DynamicState dynamic_state; VideoCommon::TransformFeedbackState xfb_state; - void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, + bool has_dynamic_vertex_input); size_t Hash() const noexcept; @@ -216,16 +223,24 @@ struct FixedPipelineState { } size_t Size() const noexcept { - if (xfb_enabled != 0) { + if (xfb_enabled) { // When transform feedback is enabled, use the whole struct return sizeof(*this); - } else if (no_extended_dynamic_state != 0) { - // Dynamic state is enabled, we can enable more - return offsetof(FixedPipelineState, xfb_state); - } else { - // No XFB, extended dynamic state enabled + } + if (dynamic_vertex_input) { + // Exclude dynamic state and attributes + return offsetof(FixedPipelineState, attributes); + } + if (extended_dynamic_state) { + // Exclude dynamic state return offsetof(FixedPipelineState, dynamic_state); } + // Default + return offsetof(FixedPipelineState, xfb_state); + } + + u32 DynamicAttributeType(size_t index) const noexcept { + return (attribute_types >> (index * 2)) & 0b11; } }; static_assert(std::has_unique_object_representations_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 06a80c2ba..ccef71f4c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,39 +472,65 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (!device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; - for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = key.state.binding_divisors[index] != 0; - const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; - vertex_bindings.push_back({ - .binding = static_cast(index), - .stride = dynamic.vertex_strides[index], - .inputRate = rate, - }); - if (instanced) { - vertex_binding_divisors.push_back({ + static_vector vertex_attributes; + if (key.state.dynamic_vertex_input) { + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const u32 type = key.state.DynamicAttributeType(index); + if (!input_attributes[index].used || type == 0) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = 0, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT + : VK_FORMAT_R32_UINT, + .offset = 0, + }); + } + if (!vertex_attributes.empty()) { + vertex_bindings.push_back({ + .binding = 0, + .stride = 4, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }); + } + } else { + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool instanced = key.state.binding_divisors[index] != 0; + const auto rate = + instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vertex_bindings.push_back({ .binding = static_cast(index), - .divisor = key.state.binding_divisors[index], + .stride = dynamic.vertex_strides[index], + .inputRate = rate, }); + if (instanced) { + vertex_binding_divisors.push_back({ + .binding = static_cast(index), + .divisor = key.state.binding_divisors[index], + }); + } } - } - static_vector vertex_attributes; - const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < key.state.attributes.size(); ++index) { - const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { - continue; + const auto& input_attributes = stage_infos[0].input_generics; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; + if (!attribute.enabled || !input_attributes[index].used) { + continue; + } + vertex_attributes.push_back({ + .location = static_cast(index), + .binding = attribute.buffer, + .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), + .offset = attribute.offset, + }); } - vertex_attributes.push_back({ - .location = static_cast(index), - .binding = attribute.buffer, - .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), - .offset = attribute.offset, - }); } VkPipelineVertexInputStateCreateInfo vertex_input_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, @@ -545,27 +571,25 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .flags = 0, .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; - VkPipelineViewportStateCreateInfo viewport_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = Maxwell::NumViewports, - .pViewports = nullptr, - .scissorCount = Maxwell::NumViewports, - .pScissors = nullptr, - }; + std::array swizzles; std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); - VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ + const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, .flags = 0, .viewportCount = Maxwell::NumViewports, .pViewportSwizzles = swizzles.data(), }; - if (device.IsNvViewportSwizzleSupported()) { - viewport_ci.pNext = &swizzle_ci; - } + const VkPipelineViewportStateCreateInfo viewport_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, + .flags = 0, + .viewportCount = Maxwell::NumViewports, + .pViewports = nullptr, + .scissorCount = Maxwell::NumViewports, + .pScissors = nullptr, + }; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, @@ -660,13 +684,13 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, }; - if (device.IsExtExtendedDynamicStateSupported()) { + if (key.state.extended_dynamic_state) { static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, @@ -678,6 +702,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_STENCIL_OP_EXT, }; + if (key.state.dynamic_vertex_input) { + dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); + } dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); } const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6df4088a7..db7da5555 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -109,6 +109,20 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA return Shader::AttributeType::Float; } +Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { + switch (state.DynamicAttributeType(index)) { + case 0: + return Shader::AttributeType::Disabled; + case 1: + return Shader::AttributeType::Float; + case 2: + return Shader::AttributeType::SignedInt; + case 3: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Disabled; +} + Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program) { Shader::RuntimeInfo info; @@ -123,13 +137,19 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, if (key.state.topology == Maxwell::PrimitiveTopology::Points) { info.fixed_state_point_size = point_size; } - if (key.state.xfb_enabled != 0) { + if (key.state.xfb_enabled) { info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); } info.convert_depth_mode = gl_ndc; } - std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), - &CastAttributeType); + if (key.state.dynamic_vertex_input) { + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + info.generic_input_types[index] = AttributeType(key.state, index); + } + } else { + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + } break; case Shader::Stage::TessellationEval: // We have to flip tessellation clockwise for some reason... @@ -298,7 +318,8 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { current_pipeline = nullptr; return nullptr; } - graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), + device.IsExtVertexInputDynamicStateSupported()); if (current_pipeline) { GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e339e9739..855c17769 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -551,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateFrontFace(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); + if (device.IsExtVertexInputDynamicStateSupported()) { + UpdateVertexInput(regs); + } } } @@ -780,4 +783,57 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& }); } +void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[Dirty::VertexInput]) { + return; + } + dirty[Dirty::VertexInput] = false; + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexAttribute0 + index]) { + continue; + } + const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; + const u32 binding{attribute.buffer}; + dirty[Dirty::VertexAttribute0 + index] = false; + dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; + + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = attribute.IsConstant() + ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 + : MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + if (!dirty[Dirty::VertexBinding0 + index]) { + continue; + } + dirty[Dirty::VertexBinding0 + index] = false; + + const u32 binding{static_cast(index)}; + const auto& input_binding{regs.vertex_array[binding]}; + const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; + bindings.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, + .pNext = nullptr, + .binding = binding, + .stride = input_binding.stride, + .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, + .divisor = is_instanced ? input_binding.divisor : 1, + }); + } + scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { + cmdbuf.SetVertexInputEXT(bindings, attributes); + }); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 1302bed02..c954fa7f8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -135,6 +135,8 @@ private: void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); + Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..0ebe0473f 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, - StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, - DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, + Viewports, Scissors, DepthBias, BlendConstants, + DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, + DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, + StencilOp, StencilTestEnable, VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { flags[index] = true; } + for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { + flags[index] = true; + } + for (int index = VertexBinding0; index <= VertexBinding31; ++index) { + flags[index] = true; + } return flags; } @@ -134,31 +141,38 @@ void SetupDirtyBlending(Tables& tables) { FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); } -void SetupDirtyInstanceDivisors(Tables& tables) { - static constexpr size_t divisor_offset = 3; - for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { - tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; - tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = - InstanceDivisors; +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; } } void SetupDirtyVertexAttributes(Tables& tables) { - FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); + for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); + } + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); } -void SetupDirtyViewportSwizzles(Tables& tables) { - static constexpr size_t swizzle_offset = 6; - for (size_t index = 0; index < Regs::NumViewports; ++index) { - tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = - ViewportSwizzles; +void SetupDirtyVertexBindings(Tables& tables) { + // Do NOT include stride here, it's implicit in VertexBuffer + static constexpr size_t divisor_offset = 3; + for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const u8 flag = static_cast(VertexBinding0 + i); + tables[0][OFF(instanced_arrays) + i] = VertexInput; + tables[1][OFF(instanced_arrays) + i] = flag; + tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; + tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; } } } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { - auto& tables = gpu.Maxwell3D().dirty.tables; + auto& tables{gpu.Maxwell3D().dirty.tables}; SetupDirtyFlags(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -175,9 +189,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); SetupDirtyBlending(tables); - SetupDirtyInstanceDivisors(tables); - SetupDirtyVertexAttributes(tables); SetupDirtyViewportSwizzles(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyVertexBindings(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..1976b7e9b 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -19,6 +19,12 @@ namespace Dirty { enum : u8 { First = VideoCommon::Dirty::LastCommonEntry, + VertexInput, + VertexAttribute0, + VertexAttribute31 = VertexAttribute0 + 31, + VertexBinding0, + VertexBinding31 = VertexBinding0 + 31, + Viewports, Scissors, DepthBias, @@ -36,8 +42,6 @@ enum : u8 { StencilTestEnable, Blending, - InstanceDivisors, - VertexAttributes, ViewportSwizzles, Last diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 37f589612..4fda472b0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -239,6 +239,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. + bool IsExtVertexInputDynamicStateSupported() const { + return ext_vertex_input_dynamic_state; + } + /// Returns true if the device supports VK_EXT_shader_stencil_export. bool IsExtShaderStencilExportSupported() const { return ext_shader_stencil_export; @@ -349,6 +354,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 33fb74bfb..7e13ae8af 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -123,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); + X(vkCmdSetVertexInputEXT); X(vkCmdResolveImage); X(vkCreateBuffer); X(vkCreateBufferView); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 3e36d356a..6e5be1186 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -238,6 +238,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; @@ -1203,6 +1204,13 @@ public: dld->vkCmdSetStencilTestEnableEXT(handle, enable ? VK_TRUE : VK_FALSE); } + void SetVertexInputEXT( + vk::Span bindings, + vk::Span attributes) const noexcept { + dld->vkCmdSetVertexInputEXT(handle, bindings.size(), bindings.data(), attributes.size(), + attributes.data()); + } + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, const VkDeviceSize* offsets, const VkDeviceSize* sizes) const noexcept { -- cgit v1.2.3 From 41cca8b8ad6f7ea33e74210aee4e3867ffa0622e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:24:49 -0300 Subject: vk_pipeline_cache: Skip cached pipelines with different dynamic state --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index db7da5555..b17f34cdd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -391,10 +391,16 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; + const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); + const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); + if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || + (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { + return; + } workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; -- cgit v1.2.3 From ba3bdf1d4156fa6fd257305406a3b88f0c288006 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 23:25:40 -0300 Subject: vulkan_device: Enable VK_EXT_vertex_input_dynamic_state --- src/video_core/vulkan_common/vulkan_device.cpp | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 618535aae..8eb37a77a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -425,6 +425,18 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support provoking vertex last"); } + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input_dynamic; + if (ext_vertex_input_dynamic_state) { + vertex_input_dynamic = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, + .pNext = nullptr, + .vertexInputDynamicState = VK_TRUE, + }; + SetNext(next, vertex_input_dynamic); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support vertex input dynamic state"); + } + VkPhysicalDeviceShaderAtomicInt64FeaturesKHR atomic_int64; if (ext_shader_atomic_int64) { atomic_int64 = { @@ -732,6 +744,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_extended_dynamic_state{}; bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; + bool has_ext_vertex_input_dynamic_state{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -763,6 +776,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); test(has_ext_provoking_vertex, VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME, false); + test(has_ext_vertex_input_dynamic_state, VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME, + false); test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); @@ -827,6 +842,19 @@ std::vector Device::LoadExtensions(bool requires_surface) { ext_provoking_vertex = true; } } + if (has_ext_vertex_input_dynamic_state) { + VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT vertex_input; + vertex_input.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT; + vertex_input.pNext = nullptr; + features.pNext = &vertex_input; + physical.GetFeatures2KHR(features); + + if (vertex_input.vertexInputDynamicState) { + extensions.push_back(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + ext_vertex_input_dynamic_state = true; + } + } if (has_ext_shader_atomic_int64) { VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; -- cgit v1.2.3 From 373f75d944473731408d7a72c967d5c4b37af5bb Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:22:31 -0400 Subject: shader: Add shader loop safety check settings Also add a setting for enable Nsight Aftermath. --- src/common/settings.h | 3 + .../backend/glasm/emit_glasm_instructions.h | 2 + .../backend/glasm/emit_glasm_not_implemented.cpp | 8 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 24 +++++--- .../backend/spirv/emit_spirv_instructions.h | 2 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 14 ++++- src/shader_recompiler/frontend/ir/ir_emitter.h | 3 + src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../frontend/maxwell/structured_control_flow.cpp | 42 ++++++++++++-- src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | 66 +++++++++++++++------- src/video_core/vulkan_common/vulkan_device.cpp | 4 +- src/yuzu/configuration/config.cpp | 4 ++ src/yuzu/configuration/configure_debug.cpp | 8 +++ src/yuzu/configuration/configure_debug.ui | 26 +++++++++ src/yuzu_cmd/config.cpp | 2 + src/yuzu_cmd/default_ini.h | 8 +++ 16 files changed, 183 insertions(+), 35 deletions(-) (limited to 'src/video_core') diff --git a/src/common/settings.h b/src/common/settings.h index ce1bc647d..ac0590690 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -308,6 +308,9 @@ struct Values { // Renderer Setting renderer_backend{RendererBackend::OpenGL, "backend"}; BasicSetting renderer_debug{false, "debug"}; + BasicSetting enable_nsight_aftermath{false, "nsight_aftermath"}; + BasicSetting disable_shader_loop_safety_checks{false, + "disable_shader_loop_safety_checks"}; Setting vulkan_device{0, "vulkan_device"}; Setting resolution_factor{1, "resolution_factor"}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index c9f4826ce..fef9ff9be 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -42,6 +42,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp index 95bcbd750..60735fe31 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp @@ -153,6 +153,14 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } +void EmitSetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetLoopSafetyVariable(EmitContext& ctx) { + NotImplemented(); +} + void EmitGetZFlag(EmitContext& ctx) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 442a958a5..42fff74e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -163,35 +163,43 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde } // Anonymous namespace void EmitGetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetRegister(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetPred(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetGotoVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitSetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); } void EmitGetIndirectBranchVariable(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); + throw LogicError("Unreachable instruction"); +} + +void EmitSetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); +} + +void EmitGetLoopSafetyVariable(EmitContext&) { + throw LogicError("Unreachable instruction"); } Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1181e7b4f..e3e5b03fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -43,6 +43,8 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitSetLoopSafetyVariable(EmitContext& ctx); +void EmitGetLoopSafetyVariable(EmitContext& ctx); Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index e9fd41237..6c37af5e7 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -125,6 +125,12 @@ U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { } } +void IREmitter::SetPred(IR::Pred pred, const U1& value) { + if (pred != IR::Pred::PT) { + Inst(Opcode::SetPred, pred, value); + } +} + U1 IREmitter::GetGotoVariable(u32 id) { return Inst(Opcode::GetGotoVariable, id); } @@ -141,8 +147,12 @@ void IREmitter::SetIndirectBranchVariable(const U32& value) { Inst(Opcode::SetIndirectBranchVariable, value); } -void IREmitter::SetPred(IR::Pred pred, const U1& value) { - Inst(Opcode::SetPred, pred, value); +U32 IREmitter::GetLoopSafetyVariable(u32 id) { + return Inst(Opcode::GetLoopSafetyVariable, id); +} + +void IREmitter::SetLoopSafetyVariable(u32 id, const U32& counter) { + Inst(Opcode::SetLoopSafetyVariable, id, counter); } U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index bb3500c54..7caab1f61 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -55,6 +55,9 @@ public: [[nodiscard]] U32 GetIndirectBranchVariable(); void SetIndirectBranchVariable(const U32& value); + [[nodiscard]] U32 GetLoopSafetyVariable(u32 id); + void SetLoopSafetyVariable(u32 id, const U32& counter); + [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, bool is_signed); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8a8d0d759..e87aeddd5 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -32,6 +32,8 @@ OPCODE(GetGotoVariable, U1, U32, OPCODE(SetGotoVariable, Void, U32, U1, ) OPCODE(GetIndirectBranchVariable, U32, ) OPCODE(SetIndirectBranchVariable, Void, U32, ) +OPCODE(GetLoopSafetyVariable, U32, U32, ) +OPCODE(SetLoopSafetyVariable, Void, U32, U32, ) OPCODE(GetCbufU8, U32, U32, U32, ) OPCODE(GetCbufS8, U32, U32, U32, ) OPCODE(GetCbufU16, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index c1e0646e6..b2b8c492a 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -9,11 +9,13 @@ #include #include #include +#include #include #include +#include "common/settings.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/ir_emitter.h" @@ -739,8 +741,25 @@ private: } case StatementType::Loop: { IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; - if (current_block) { - current_block->AddBranch(loop_header_block); + const u32 this_loop_id{loop_id++}; + + if (Settings::values.disable_shader_loop_safety_checks) { + if (current_block) { + current_block->AddBranch(loop_header_block); + } + } else { + IR::Block* const init_block{block_pool.Create(inst_pool)}; + IR::IREmitter ir{*init_block}; + ir.SetLoopSafetyVariable(this_loop_id, ir.Imm32(0x2000)); + + if (current_block) { + current_block->AddBranch(init_block); + } + init_block->AddBranch(loop_header_block); + + auto& init_node{syntax_list.emplace_back()}; + init_node.type = IR::AbstractSyntaxNode::Type::Block; + init_node.data.block = init_block; } auto& header_node{syntax_list.emplace_back()}; header_node.type = IR::AbstractSyntaxNode::Type::Block; @@ -758,7 +777,16 @@ private: // The continue block is located at the end of the loop IR::IREmitter ir{*continue_block}; - const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; + IR::U1 cond{VisitExpr(ir, *stmt.cond)}; + if (!Settings::values.disable_shader_loop_safety_checks) { + const IR::U32 old_counter{ir.GetLoopSafetyVariable(this_loop_id)}; + const IR::U32 new_counter{ir.ISub(old_counter, ir.Imm32(1))}; + ir.SetLoopSafetyVariable(this_loop_id, new_counter); + + const IR::U1 safety_cond{ir.INotEqual(new_counter, ir.Imm32(0))}; + cond = ir.LogicalAnd(cond, safety_cond); + } + cond = ir.ConditionRef(cond); IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; loop_header_block->AddBranch(body_block); @@ -863,8 +891,14 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - // TODO: Make this constexpr when std::vector is constexpr + u32 loop_id{}; + +// TODO: C++20 Remove this when all compilers support constexpr std::vector +#if __cpp_lib_constexpr_vector >= 201907 + static constexpr Flow::Block dummy_flow_block; +#else const Flow::Block dummy_flow_block; +#endif }; } // Anonymous namespace diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp index e54499ba5..a4ba393ef 100644 --- a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp @@ -48,73 +48,91 @@ struct GotoVariable : FlagTag { u32 index; }; +struct LoopSafetyVariable { + LoopSafetyVariable() = default; + explicit LoopSafetyVariable(u32 index_) : index{index_} {} + + auto operator<=>(const LoopSafetyVariable&) const noexcept = default; + + u32 index; +}; + struct IndirectBranchVariable { auto operator<=>(const IndirectBranchVariable&) const noexcept = default; }; -using Variant = std::variant; -using ValueMap = boost::container::flat_map>; +using Variant = + std::variant; +using ValueMap = boost::container::flat_map; struct DefTable { - const IR::Value& Def(IR::Block* block, IR::Reg variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Reg variable) { return block->SsaRegValue(variable); } - void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { block->SetSsaRegValue(variable, value); } - const IR::Value& Def(IR::Block* block, IR::Pred variable) noexcept { + const IR::Value& Def(IR::Block* block, IR::Pred variable) { return preds[IR::PredIndex(variable)][block]; } - void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { preds[IR::PredIndex(variable)].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, GotoVariable variable) noexcept { + const IR::Value& Def(IR::Block* block, GotoVariable variable) { return goto_vars[variable.index][block]; } - void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { goto_vars[variable.index].insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, IndirectBranchVariable) noexcept { + const IR::Value& Def(IR::Block* block, LoopSafetyVariable variable) { + return loop_safety_vars[variable.index][block]; + } + void SetDef(IR::Block* block, LoopSafetyVariable variable, const IR::Value& value) { + loop_safety_vars[variable.index].insert_or_assign(block, value); + } + + const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { return indirect_branch_var[block]; } - void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { indirect_branch_var.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, ZeroFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, ZeroFlagTag) { return zero_flag[block]; } - void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { zero_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, SignFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, SignFlagTag) { return sign_flag[block]; } - void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { sign_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, CarryFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, CarryFlagTag) { return carry_flag[block]; } - void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { carry_flag.insert_or_assign(block, value); } - const IR::Value& Def(IR::Block* block, OverflowFlagTag) noexcept { + const IR::Value& Def(IR::Block* block, OverflowFlagTag) { return overflow_flag[block]; } - void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) noexcept { + void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { overflow_flag.insert_or_assign(block, value); } std::array preds; boost::container::flat_map goto_vars; + boost::container::flat_map loop_safety_vars; ValueMap indirect_branch_var; ValueMap zero_flag; ValueMap sign_flag; @@ -134,6 +152,10 @@ IR::Opcode UndefOpcode(const FlagTag&) noexcept { return IR::Opcode::UndefU1; } +IR::Opcode UndefOpcode(const LoopSafetyVariable&) noexcept { + return IR::Opcode::UndefU32; +} + IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { return IR::Opcode::UndefU32; } @@ -315,6 +337,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::SetGotoVariable: pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); break; + case IR::Opcode::SetLoopSafetyVariable: + pass.WriteVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block, inst.Arg(0)); + break; case IR::Opcode::SetIndirectBranchVariable: pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); break; @@ -343,6 +368,9 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetGotoVariable: inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); break; + case IR::Opcode::GetLoopSafetyVariable: + inst.ReplaceUsesWith(pass.ReadVariable(LoopSafetyVariable{inst.Arg(0).U32()}, block)); + break; case IR::Opcode::GetIndirectBranchVariable: inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); break; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8eb37a77a..bf063c047 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -467,7 +467,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } VkDeviceDiagnosticsConfigCreateInfoNV diagnostics_nv; - if (nv_device_diagnostics_config) { + if (Settings::values.enable_nsight_aftermath && nv_device_diagnostics_config) { nsight_aftermath_tracker = std::make_unique(); diagnostics_nv = { @@ -781,7 +781,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); - if (Settings::values.renderer_debug) { + if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a5e032959..dc69574a9 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -824,6 +824,8 @@ void Config::ReadRendererValues() { if (global) { ReadBasicSetting(Settings::values.renderer_debug); + ReadBasicSetting(Settings::values.enable_nsight_aftermath); + ReadBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); @@ -1353,6 +1355,8 @@ void Config::SaveRendererValues() { if (global) { WriteBasicSetting(Settings::values.renderer_debug); + WriteBasicSetting(Settings::values.enable_nsight_aftermath); + WriteBasicSetting(Settings::values.disable_shader_loop_safety_checks); } qt_config->endGroup(); diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index 8fceb3878..f7e29dbd7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -45,8 +45,13 @@ void ConfigureDebug::SetConfiguration() { ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug.GetValue()); ui->enable_cpu_debugging->setEnabled(runtime_lock); ui->enable_cpu_debugging->setChecked(Settings::values.cpu_debug_mode.GetValue()); + ui->enable_nsight_aftermath->setEnabled(runtime_lock); + ui->enable_nsight_aftermath->setChecked(Settings::values.enable_nsight_aftermath.GetValue()); ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit.GetValue()); + ui->disable_loop_safety_checks->setEnabled(runtime_lock); + ui->disable_loop_safety_checks->setChecked( + Settings::values.disable_shader_loop_safety_checks.GetValue()); ui->extended_logging->setChecked(Settings::values.extended_logging.GetValue()); } @@ -61,6 +66,9 @@ void ConfigureDebug::ApplyConfiguration() { Settings::values.use_auto_stub = ui->use_auto_stub->isChecked(); Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); Settings::values.cpu_debug_mode = ui->enable_cpu_debugging->isChecked(); + Settings::values.enable_nsight_aftermath = ui->enable_nsight_aftermath->isChecked(); + Settings::values.disable_shader_loop_safety_checks = + ui->disable_loop_safety_checks->isChecked(); Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); Settings::values.extended_logging = ui->extended_logging->isChecked(); Debugger::ToggleConsole(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index 1260ad6f0..c8baf2921 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -125,6 +125,16 @@ + + + + When checked, it enables Nsight Aftermath crash dumps + + + Enable Nsight Aftermath + + + @@ -138,6 +148,16 @@ + + + + When checked, it executes shaders without loop logic changes + + + Disable Loop safety checks + + + @@ -252,11 +272,17 @@ log_filter_edit toggle_console + extended_logging open_log_button homebrew_args_edit enable_graphics_debugging + enable_nsight_aftermath + disable_macro_jit + disable_loop_safety_checks reporting_services quest_flag + use_debug_asserts + use_auto_stub diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 3e22fee37..763df6dd6 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -444,6 +444,8 @@ void Config::ReadValues() { // Renderer ReadSetting("Renderer", Settings::values.renderer_backend); ReadSetting("Renderer", Settings::values.renderer_debug); + ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); + ReadSetting("Renderer", Settings::values.disable_shader_loop_safety_checks); ReadSetting("Renderer", Settings::values.vulkan_device); ReadSetting("Renderer", Settings::values.fullscreen_mode); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 88d33ecab..a6ca7b6cd 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -221,6 +221,14 @@ backend = # 0 (default): Disabled, 1: Enabled debug = +# Enable Nsight Aftermath crash dumps +# 0 (default): Disabled, 1: Enabled +nsight_aftermath = + +# Disable shader loop safety checks, executing the shader without loop logic changes +# 0 (default): Disabled, 1: Enabled +disable_shader_loop_safety_checks = + # Which Vulkan physical device to use (defaults to 0) vulkan_device = -- cgit v1.2.3 From 94e751f415d70fe255eada77c4385ec966c07a95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 14 Jun 2021 04:32:45 -0300 Subject: buffer_cache: Invalidate fast buffers on compute --- src/video_core/buffer_cache/buffer_cache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 47cb0a47d..d004199ba 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1132,6 +1132,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty dirty_uniform_buffers.fill(~u32{0}); + fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { -- cgit v1.2.3 From dbf7cb9f90a99faa6d6ab07558d65dd113728ff1 Mon Sep 17 00:00:00 2001 From: Rodrigo Locatti Date: Mon, 14 Jun 2021 22:02:42 -0300 Subject: vk_graphics_pipeline: Fix path with no VK_EXT_extended_dynamic_state --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ccef71f4c..e02b1b7ab 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -472,7 +472,7 @@ void GraphicsPipeline::ConfigureDraw() { void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; - if (key.state.extended_dynamic_state) { + if (!key.state.extended_dynamic_state) { dynamic = key.state.dynamic_state; } static_vector vertex_bindings; -- cgit v1.2.3 From 8fb204893430de2d5c30e008e98db313f890f447 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 02:43:01 -0300 Subject: vk_rasterizer: Exit render passes on fragment barriers --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 855c17769..c57e16c50 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -440,6 +440,7 @@ void RasterizerVulkan::WaitForIdle() { void RasterizerVulkan::FragmentBarrier() { // We already put barriers when a render pass finishes + scheduler.RequestOutsideRenderPassOperationContext(); } void RasterizerVulkan::TiledCacheBarrier() { -- cgit v1.2.3 From eaff1030de07f3739794207403ea833ee91c0034 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 19 May 2021 21:58:32 -0400 Subject: glsl: Initial backend --- src/shader_recompiler/CMakeLists.txt | 26 + .../backend/glsl/emit_context.cpp | 30 + src/shader_recompiler/backend/glsl/emit_context.h | 62 + src/shader_recompiler/backend/glsl/emit_glsl.cpp | 156 ++ src/shader_recompiler/backend/glsl/emit_glsl.h | 23 + .../backend/glsl/emit_glsl_atomic.cpp | 0 .../backend/glsl/emit_glsl_barriers.cpp | 0 .../backend/glsl/emit_glsl_bitwise_conversion.cpp | 0 .../backend/glsl/emit_glsl_composite.cpp | 0 .../backend/glsl/emit_glsl_context_get_set.cpp | 48 + .../backend/glsl/emit_glsl_control_flow.cpp | 0 .../backend/glsl/emit_glsl_convert.cpp | 0 .../backend/glsl/emit_glsl_floating_point.cpp | 0 .../backend/glsl/emit_glsl_image.cpp | 0 .../backend/glsl/emit_glsl_image_atomic.cpp | 0 .../backend/glsl/emit_glsl_instructions.h | 656 ++++++ .../backend/glsl/emit_glsl_integer.cpp | 0 .../backend/glsl/emit_glsl_logical.cpp | 0 .../backend/glsl/emit_glsl_memory.cpp | 0 .../backend/glsl/emit_glsl_not_implemented.cpp | 2149 ++++++++++++++++++++ .../backend/glsl/emit_glsl_select.cpp | 0 .../backend/glsl/emit_glsl_shared_memory.cpp | 0 .../backend/glsl/emit_glsl_special.cpp | 0 .../backend/glsl/emit_glsl_undefined.cpp | 0 .../backend/glsl/emit_glsl_warp.cpp | 0 src/shader_recompiler/backend/glsl/reg_alloc.cpp | 96 + src/shader_recompiler/backend/glsl/reg_alloc.h | 46 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- 28 files changed, 3297 insertions(+), 2 deletions(-) create mode 100644 src/shader_recompiler/backend/glsl/emit_context.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_context.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_instructions.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_select.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_special.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp create mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.cpp create mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d6d8e5f59..9b2240931 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -24,6 +24,32 @@ add_library(shader_recompiler STATIC backend/glasm/emit_glasm_warp.cpp backend/glasm/reg_alloc.cpp backend/glasm/reg_alloc.h + backend/glsl/emit_context.cpp + backend/glsl/emit_context.h + backend/glsl/emit_glsl.cpp + backend/glsl/emit_glsl.h + backend/glsl/emit_glsl_atomic.cpp + backend/glsl/emit_glsl_barriers.cpp + backend/glsl/emit_glsl_bitwise_conversion.cpp + backend/glsl/emit_glsl_composite.cpp + backend/glsl/emit_glsl_context_get_set.cpp + backend/glsl/emit_glsl_control_flow.cpp + backend/glsl/emit_glsl_convert.cpp + backend/glsl/emit_glsl_floating_point.cpp + backend/glsl/emit_glsl_image.cpp + backend/glsl/emit_glsl_image_atomic.cpp + backend/glsl/emit_glsl_instructions.h + backend/glsl/emit_glsl_integer.cpp + backend/glsl/emit_glsl_logical.cpp + backend/glsl/emit_glsl_memory.cpp + backend/glsl/emit_glsl_not_implemented.cpp + backend/glsl/emit_glsl_select.cpp + backend/glsl/emit_glsl_shared_memory.cpp + backend/glsl/emit_glsl_special.cpp + backend/glsl/emit_glsl_undefined.cpp + backend/glsl/emit_glsl_warp.cpp + backend/glsl/reg_alloc.cpp + backend/glsl/reg_alloc.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..e2a9885f0 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::GLSL { + +EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindings, + const Profile& profile_) + : info{program.info}, profile{profile_} { + std::string header = "#version 450 core\n"; + header += "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;"; + code += header; + DefineConstantBuffers(); + code += "void main(){"; +} + +void EmitContext::DefineConstantBuffers() { + if (info.constant_buffer_descriptors.empty()) { + return; + } + for (const auto& desc : info.constant_buffer_descriptors) { + Add("uniform uint c{}[{}];", desc.index, desc.count); + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..ffc97007d --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + + template + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + template + void Add(const char* format_str, Args&&... args) { + code += fmt::format(format_str, std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string AllocVar() { + return fmt::format("var_{}", var_num++); + } + + std::string code; + RegAlloc reg_alloc; + const Info& info; + const Profile& profile; + u64 var_num{}; + +private: + void DefineConstantBuffers(); +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..bb1d8b272 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -0,0 +1,156 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +#pragma optimize("", off) +namespace Shader::Backend::GLSL { +namespace { +template +struct FuncTraits {}; + +template +struct FuncTraits { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template + using ArgType = std::tuple_element_t>; +}; + +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition(func(ctx, std::forward(args)...)); +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.reg_alloc.Consume(arg); + } else if constexpr (std::is_same_v) { + return *arg.Inst(); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } else if constexpr (std::is_same_v) { + return arg.Reg(); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("if ("); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("){{"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("while ("); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (node.data.repeat.cond.IsImmediate()) { + if (node.data.repeat.cond.U1()) { + ctx.Add("ENDREP;"); + } else { + ctx.Add("BRK;" + "ENDREP;"); + } + } + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("break;"); + } + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("return;"); + break; + default: + ctx.Add("UNAHNDLED {}", node.type); + break; + } + } +} + +} // Anonymous namespace + +std::string EmitGLSL(const Profile& profile, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings, profile}; + // ctx.SetupBuffers(); + EmitCode(ctx, program); + ctx.code += "}"; + return ctx.code; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..a7c666107 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +[[nodiscard]] std::string EmitGLSL(const Profile& profile, IR::Program& program, + Bindings& binding); + +[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitGLSL(profile, program, binding); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..7b2ed358e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + const auto var{ctx.AllocVar()}; + ctx.Add("uint {} = c{}[{}];", var, binding.U32(), offset.U32()); +} + +void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..1d86820dc --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -0,0 +1,656 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +class EmitContext; + +inline void EmitSetLoopSafetyVariable(EmitContext&) {} +inline void EmitGetLoopSafetyVariable(EmitContext&) {} + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitBranch(EmitContext& ctx, std::string_view label); +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label); +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx); +void EmitIsHelperInvocation(EmitContext& ctx); +void EmitYDirection(EmitContext& ctx); +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx); +void EmitUndefU8(EmitContext& ctx); +void EmitUndefU16(EmitContext& ctx); +void EmitUndefU32(EmitContext& ctx); +void EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx); +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, std::string_view value); +void EmitFPSin(EmitContext& ctx, std::string_view value); +void EmitFPCos(EmitContext& ctx, std::string_view value); +void EmitFPExp2(EmitContext& ctx, std::string_view value); +void EmitFPLog2(EmitContext& ctx, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, std::string_view value); +void EmitINeg64(EmitContext& ctx, std::string_view value); +void EmitIAbs32(EmitContext& ctx, std::string_view value); +void EmitIAbs64(EmitContext& ctx, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, std::string_view value); +void EmitBitCount32(EmitContext& ctx, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, std::string_view value); +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx); +void EmitVoteAll(EmitContext& ctx, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx); +void EmitSubgroupLtMask(EmitContext& ctx); +void EmitSubgroupLeMask(EmitContext& ctx); +void EmitSubgroupGtMask(EmitContext& ctx); +void EmitSubgroupGeMask(EmitContext& ctx); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..8bd40458b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -0,0 +1,2149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLSL { + +static void NotImplemented() { + throw NotImplementedException("GLSL instruction"); +} + +void EmitPhi(EmitContext& ctx, IR::Inst* inst) { + NotImplemented(); +} + +void EmitVoid(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIdentity(EmitContext& ctx, const IR::Value& value) { + const auto var{ctx.AllocVar()}; + switch (value.Type()) { + case IR::Type::U32: + ctx.Add("uint {}={};", var, value.U32()); + break; + default: + ctx.Add("EmitIdentity {}", value.Type()); + break; + } +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + NotImplemented(); +} + +void EmitReference(EmitContext&) { + NotImplemented(); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { + NotImplemented(); +} + +void EmitBranch(EmitContext& ctx, std::string_view label) { + NotImplemented(); +} + +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label) { + NotImplemented(); +} + +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, + std::string_view continue_label) { + NotImplemented(); +} + +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { + NotImplemented(); +} + +void EmitReturn(EmitContext& ctx) { + NotImplemented(); +} + +void EmitJoin(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUnreachable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { + NotImplemented(); +} + +void EmitBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPrologue(EmitContext& ctx) { + // NotImplemented(); +} + +void EmitEpilogue(EmitContext& ctx) { + // NotImplemented(); +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + NotImplemented(); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + NotImplemented(); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLocalInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSampleId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIsHelperInvocation(EmitContext& ctx) { + NotImplemented(); +} + +void EmitYDirection(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { + NotImplemented(); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + NotImplemented(); +} + +void EmitUndefU1(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitWriteGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + // ctx.Add("c{}[{}]={}", binding.U32() + 2, offset.U32(), 16); + ctx.Add("EmitWriteStorage32"); + // ctx.Add("c{}[{}]={}", binding.U32(), offset.U32(), value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitBitCastU16F16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastU64F64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF16U16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastF64U64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitFPAbs16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPNeg16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSin(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCos(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPExp2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPLog2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitINeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitINeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitReverse32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCount32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindSMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindUMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalNot(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + NotImplemented(); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + NotImplemented(); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms) { + NotImplemented(); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod) { + NotImplemented(); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp) { + NotImplemented(); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitLaneId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitVoteAll(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteAny(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupEqMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + NotImplemented(); +} + +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp new file mode 100644 index 000000000..591a87988 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -0,0 +1,96 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr std::string_view SWIZZLE = "xyzw"; + +std::string Representation(Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spilling"); + } + const u32 num_elements{id.num_elements_minus_one + 1}; + const u32 index{static_cast(id.index)}; + if (num_elements == 4) { + return fmt::format("R{}", index); + } else { + return fmt::format("R{}.{}", index, SWIZZLE.substr(id.base_element, num_elements)); + } +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}", value.U32()); + case IR::Type::F32: + return fmt::format("{}", value.F32()); + case IR::Type::U64: + return fmt::format("{}", value.U64()); + case IR::Type::F64: + return fmt::format("{}", value.F64()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { + const Id id{Alloc(num_elements, alignment)}; + inst.SetDefinition(id); + return Representation(id); +} + +std::string RegAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : Consume(*value.Inst()); +} + +std::string RegAlloc::Consume(IR::Inst& inst) { + const Id id{inst.Definition()}; + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(id); + } + return Representation(inst.Definition()); +} + +Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (register_use[reg]) { + continue; + } + num_used_registers = std::max(num_used_registers, reg + 1); + register_use[reg] = true; + return Id{ + .base_element = 0, + .num_elements_minus_one = num_elements - 1, + .index = static_cast(reg), + .is_spill = 0, + .is_condition_code = 0, + }; + } + throw NotImplementedException("Register spilling"); +} + +void RegAlloc::Free(Id id) { + if (id.is_spill != 0) { + throw NotImplementedException("Free spill"); + } + register_use[id.index] = false; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h new file mode 100644 index 000000000..850a93d6a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +struct Id { + u32 base_element : 2; + u32 num_elements_minus_one : 2; + u32 index : 26; + u32 is_spill : 1; + u32 is_condition_code : 1; +}; + +class RegAlloc { +public: + std::string Define(IR::Inst& inst, u32 num_elements = 1, u32 alignment = 1); + + std::string Consume(const IR::Value& value); + +private: + static constexpr size_t NUM_REGS = 4096; + static constexpr size_t NUM_ELEMENTS = 4; + + std::string Consume(IR::Inst& inst); + + Id Alloc(u32 num_elements, u32 alignment); + + void Free(Id id); + + size_t num_used_registers{}; + std::bitset register_use{}; +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9391a4cd9..4387532ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -19,6 +19,7 @@ #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -40,6 +41,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; +using Shader::Backend::GLSL::EmitGLSL; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; + const auto code{EmitGLSL(profile, program, binding)}; + OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } @@ -489,7 +492,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { - const std::vector code{EmitSPIRV(profile, program)}; + const auto code{EmitGLSL(profile, program)}; source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); -- cgit v1.2.3 From 53667ddd4ebdaa98f9c40ef3aee8efbdb15a0a6f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Jul 2021 17:57:35 -0300 Subject: glsl: Fixup build issues --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4387532ab..602cf025b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -437,7 +437,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const auto code{EmitGLSL(profile, program, binding)}; + const auto code{EmitGLSL(profile, runtime_info, program, binding)}; OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } -- cgit v1.2.3 From bd24fa97138ff1e33a7f8d3c30a4f4482a6482a8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 19:55:40 -0400 Subject: glsl: Query GL Device for FP16 extension support --- src/shader_recompiler/backend/glsl/emit_context.cpp | 9 +++++++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_device.cpp | 2 ++ src/video_core/renderer_opengl/gl_device.h | 10 ++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 5 files changed, 23 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 9c3fd44ba..6f769fa10 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { @@ -40,8 +41,12 @@ void EmitContext::SetupExtensions(std::string& header) { header += "#extension NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { - // TODO: AMD - header += "#extension GL_NV_gpu_shader5 : enable\n"; + if (profile.support_gl_nv_gpu_shader_5) { + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } + if (profile.support_gl_amd_gpu_shader_half_float) { + header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; + } } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3109fb69c..5d269368a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -83,6 +83,8 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_gl_nv_gpu_shader_5{}; + bool support_gl_amd_gpu_shader_half_float{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f7929f9e..071133781 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -158,6 +158,8 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 1ffd24883..9b9402c29 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,14 @@ public: return has_depth_buffer_float; } + bool HasNvGpuShader5() const { + return has_nv_gpu_shader_5; + } + + bool HasAmdShaderHalfFloat() const { + return has_amd_shader_half_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -151,6 +159,8 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_nv_gpu_shader_5{}; + bool has_amd_shader_half_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 602cf025b..e00d01e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -217,6 +217,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), + .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .warp_size_potentially_larger_than_guest = true, -- cgit v1.2.3 From 3d086e6130a2c5f0546ccef3b234c65ef2f0c99b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 26 May 2021 00:16:20 -0400 Subject: glsl: Implement some attribute getters and setters --- .../backend/glsl/emit_context.cpp | 52 +++++- src/shader_recompiler/backend/glsl/emit_context.h | 9 + .../backend/glsl/emit_glsl_composite.cpp | 14 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 53 ++++++ .../backend/glsl/emit_glsl_image.cpp | 205 +++++++++++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 9 +- .../backend/glsl/emit_glsl_not_implemented.cpp | 178 +----------------- src/shader_recompiler/backend/glsl/reg_alloc.cpp | 6 + src/shader_recompiler/backend/glsl/reg_alloc.h | 2 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - 10 files changed, 337 insertions(+), 192 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7b6c6d22b..8e5983909 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -14,17 +14,63 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin : info{program.info}, profile{profile_} { std::string header = "#version 450\n"; SetupExtensions(header); - if (program.stage == Stage::Compute) { + stage = program.stage; + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vertex"; + attrib_name = "vertex"; + // TODO: add only what's used by the shader + header += + "out gl_PerVertex {vec4 gl_Position;float gl_PointSize;float gl_ClipDistance[];};"; + break; + case Stage::TessellationControl: + case Stage::TessellationEval: + stage_name = "primitive"; + attrib_name = "primitive"; + break; + case Stage::Geometry: + stage_name = "primitive"; + attrib_name = "vertex"; + break; + case Stage::Fragment: + stage_name = "fragment"; + attrib_name = "fragment"; + break; + case Stage::Compute: + stage_name = "invocation"; header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", program.workgroup_size[0], program.workgroup_size[1], program.workgroup_size[2]); + break; } code += header; - + const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const auto& generic{info.input_generics[index]}; + if (generic.used) { + Add("layout(location={})in vec4 in_attr{};", index, index); + } + } + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index]) { + continue; + } + Add("layout(location={})out vec4 frag_color{};", index, index); + } + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { + Add("layout(location={}) out vec4 out_attr{};", index, index); + } + } DefineConstantBuffers(); DefineStorageBuffers(); DefineHelperFunctions(); - code += "void main(){\n"; + Add("void main(){{"); + + if (stage == Stage::VertexA || stage == Stage::VertexB) { + Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); + } } void EmitContext::SetupExtensions(std::string& header) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 7f8857fa7..087eaff6a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -88,6 +88,11 @@ public: Add(format_str, inst, args...); } + template + void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void Add(const char* format_str, Args&&... args) { code += fmt::format(format_str, std::forward(args)...); @@ -100,6 +105,10 @@ public: const Info& info; const Profile& profile; + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view attrib_name = "invalid"; + private: void SetupExtensions(std::string& header); void DefineConstantBuffers(); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 8e7ad68bd..048b12f38 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -155,16 +155,14 @@ void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 7c9cadd7e..441818c0b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -51,4 +51,57 @@ void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.AddF32("{}=in_attr{}.{};", inst, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.AddF32("{}=gl_Position.{};", inst, swizzle); + break; + default: + fmt::print("Get attribute {}", attr); + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("out_attr{}.{}={};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::PointSize: + ctx.Add("gl_Pointsize={};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("gl_Position.{}={};", swizzle, value); + break; + default: + fmt::print("Set attribute {}", attr); + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + const char swizzle{"xyzw"[component]}; + ctx.Add("frag_color{}.{}={};", index, swizzle, value); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index e69de29bb..109938e0e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -0,0 +1,205 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view dref, + [[maybe_unused]] std::string_view bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view dref, + [[maybe_unused]] std::string_view lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2, + [[maybe_unused]] std::string_view dref) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view lod, + [[maybe_unused]] std::string_view ms) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view lod) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view derivates, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view lod_clamp) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view color) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 9f32070b0..49ab108bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -61,7 +61,8 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); @@ -180,8 +181,10 @@ void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::str std::string_view e3, std::string_view e4); void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b37b3c76d..14a2edd74 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -83,7 +83,7 @@ void EmitUnreachable(EmitContext& ctx) { } void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { - NotImplemented(); + ctx.Add("discard;"); } void EmitBarrier(EmitContext& ctx) { @@ -146,15 +146,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { - NotImplemented(); -} - -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex) { - NotImplemented(); -} - void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { NotImplemented(); } @@ -172,10 +163,6 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { NotImplemented(); } -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { - NotImplemented(); -} - void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -456,169 +443,6 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offse NotImplemented(); } -void EmitBindlessImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { - NotImplemented(); -} - -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref) { - NotImplemented(); -} - -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms) { - NotImplemented(); -} - -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { - NotImplemented(); -} - -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { - NotImplemented(); -} - -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp) { - NotImplemented(); -} - -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { - NotImplemented(); -} - -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view color) { - NotImplemented(); -} - void EmitBindlessImageAtomicIAdd32(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index c60a87d91..a080d5341 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -55,6 +55,8 @@ std::string MakeImm(const IR::Value& value) { return fmt::format("{}ul", value.U64()); case IR::Type::F64: return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -131,6 +133,10 @@ std::string RegAlloc::GetType(Type type, u32 index) { return "uvec2 "; case Type::F32x2: return "vec2 "; + case Type::U32x4: + return "uvec4 "; + case Type::F32x4: + return "vec4 "; case Type::Void: return ""; default: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 419e1e761..df067d3ad 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -27,6 +27,8 @@ enum class Type : u32 { F64, U32x2, F32x2, + U32x4, + F32x4, Void, }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e00d01e34..8a052851b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -440,7 +440,6 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } -- cgit v1.2.3 From e35ffbbeb0f85f676416fcb8f0bb0207671f379d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 00:53:26 -0400 Subject: glsl: Implement VOTE for subgroup size potentially larger --- .../backend/glsl/emit_context.cpp | 12 ++++-- .../backend/glsl/emit_glsl_warp.cpp | 43 ++++++++++++++-------- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 5 files changed, 43 insertions(+), 20 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 5456d4e5b..c6325e55f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; - header += "#extension GL_ARB_sparse_texture2 : enable\n"; - header += "#extension GL_EXT_texture_shadow_lod : enable\n"; - // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; + if (stage != Stage::Compute) { + // TODO: track this usage + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } @@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n"; header += "#extension GL_ARB_shader_group_vote : enable\n"; + header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; + if (!info.uses_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index e462c977c..8a018acb5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in } void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); + } } void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=anyInvocationARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); + } } void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); + } } void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); + } else { + ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); + } } void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 071133781..20ea42cff 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9b9402c29..ff0ff2b08 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool IsWarpSizePotentiallyLargerThanGuest() const { + return warp_size_potentially_larger_than_guest; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -161,6 +165,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool warp_size_potentially_larger_than_guest{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8a052851b..cd11ff653 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .warp_size_potentially_larger_than_guest = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), .lower_left_origin_mode = true, .need_declared_frag_colors = true, -- cgit v1.2.3 From f4799e8fa15b92d8d5607dc5dfca4974901ee06c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:33:03 -0400 Subject: glsl: Implement transform feedback --- .../backend/glsl/emit_context.cpp | 53 ++++++++++++++++++---- src/shader_recompiler/backend/glsl/emit_context.h | 8 ++++ .../backend/glsl/emit_glsl_context_get_set.cpp | 15 ++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 ++++++-- 4 files changed, 76 insertions(+), 18 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 6f10002fe..58355d5e3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -37,7 +37,6 @@ bool StoresPerVertexAttributes(Stage stage) { case Stage::VertexA: case Stage::VertexB: case Stage::Geometry: - case Stage::TessellationControl: case Stage::TessellationEval: return true; default: @@ -154,9 +153,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { return; } header += "out gl_PerVertex{"; - if (info.stores_position) { - header += "vec4 gl_Position;"; - } + header += "vec4 gl_Position;"; if (info.stores_point_size) { header += "float gl_PointSize;"; } @@ -236,10 +233,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - const auto declaration{fmt::format("layout(location={}) out vec4 out_attr{}{};", index, - index, OutputDecorator(stage, program.invocations))}; if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { - header += declaration; + DefineGenericOutput(index, program.invocations); } } header += "\n"; @@ -312,13 +307,53 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { } } +void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_index{static_cast(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + std::string definition{fmt::format("layout(location={}", index)}; + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!runtime_info.xfb_varyings.empty()) { + xfb_varying = &runtime_info.xfb_varyings[base_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + if (element > 0) { + definition += fmt::format(",component={}", element); + } + if (xfb_varying) { + definition += + fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, + xfb_varying->stride, xfb_varying->offset); + } + std::string name{fmt::format("out_attr{}", index)}; + if (num_components < 4 || element > 0) { + name += fmt::format("_{}", swizzle.substr(element, num_components)); + } + const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; + definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); + header += definition; + + const GenericElementInfo element_info{ + .name = name, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(output_generics[index].begin() + element, num_components, element_info); + element += num_components; + } + header += "\n"; +} + void EmitContext::DefineHelperFunctions() { if (info.uses_global_increment || info.uses_shared_increment) { header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; } if (info.uses_global_decrement || info.uses_shared_decrement) { - header += - "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; + header += "uint CasDecrement(uint op_a,uint " + "op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } if (info.uses_atomic_f32_add) { header += "uint CasFloatAdd(uint op_a,float op_b){return " diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 48786a2c7..5d48675e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -30,6 +30,12 @@ struct Program; namespace Shader::Backend::GLSL { +struct GenericElementInfo { + std::string name{}; + u32 first_element{}; + u32 num_components{}; +}; + class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -149,6 +155,7 @@ public: std::vector image_buffer_bindings; std::vector texture_bindings; std::vector image_bindings; + std::array, 32> output_generics{}; bool uses_y_direction{}; bool uses_cc_carry{}; @@ -157,6 +164,7 @@ private: void SetupExtensions(std::string& header); void DefineConstantBuffers(Bindings& bindings); void DefineStorageBuffers(Bindings& bindings); + void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); void SetupImages(Bindings& bindings); }; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0cf31329d..c48492a17 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -200,13 +200,21 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, [[maybe_unused]] std::string_view vertex) { - const u32 element{static_cast(attr) % 4}; - const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.Add("out_attr{}{}.{}={};", index, OutputVertexIndex(ctx, vertex), swizzle, value); + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + const auto output_decorator{OutputVertexIndex(ctx, vertex)}; + if (info.num_components == 1) { + ctx.Add("{}{}={};", info.name, output_decorator, value); + } else { + const u32 index_element{element - info.first_element}; + ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); + } return; } + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; switch (attr) { case IR::Attribute::PointSize: ctx.Add("gl_PointSize={};", value); @@ -233,7 +241,6 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val break; } default: - fmt::print("Set attribute {}", attr); throw NotImplementedException("Set attribute {}", attr); } } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cd11ff653..0a1ba363b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -92,9 +92,15 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, - bool glasm_use_storage_buffers) { + bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; switch (program.stage) { + case Shader::Stage::VertexB: + case Shader::Stage::Geometry: + if (!use_assembly_shaders && key.xfb_enabled != 0) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + } + break; case Shader::Stage::TessellationEval: info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { @@ -420,7 +426,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array assembly_programs; Shader::Backend::Bindings binding; - if (!device.UseAssemblyShaders()) { + const bool use_glasm{device.UseAssemblyShaders()}; + if (!use_glasm) { source_program.handle = glCreateProgram(); } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -434,8 +441,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; - if (device.UseAssemblyShaders()) { + const auto runtime_info{ + MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + if (use_glasm) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { @@ -443,7 +451,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( AttachShader(Stage(stage_index), source_program.handle, code); } } - if (!device.UseAssemblyShaders()) { + if (!use_glasm) { LinkProgram(source_program.handle); } return std::make_unique( -- cgit v1.2.3 From 6577a63d368afa57d5f29df40e524af30eaabffa Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:48:49 -0400 Subject: glsl: skip gl_ViewportIndex write if device does not support it --- src/shader_recompiler/backend/glsl/emit_context.cpp | 18 ++++++++++-------- src/shader_recompiler/backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 5 files changed, 18 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 58355d5e3..846d38bfc 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,23 +148,24 @@ std::string_view OutputPrimitive(OutputTopology topology) { throw InvalidArgument("Invalid output topology {}", topology); } -void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { - if (!StoresPerVertexAttributes(stage)) { +void SetupOutPerVertex(EmitContext& ctx, std::string& header) { + if (!StoresPerVertexAttributes(ctx.stage)) { return; } header += "out gl_PerVertex{"; header += "vec4 gl_Position;"; - if (info.stores_point_size) { + if (ctx.info.stores_point_size) { header += "float gl_PointSize;"; } - if (info.stores_clip_distance) { + if (ctx.info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } - if (info.stores_viewport_index && stage != Stage::Geometry) { + if (ctx.info.stores_viewport_index && ctx.supports_viewport_layer && + ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } header += "};\n"; - if (info.stores_viewport_index && stage == Stage::Geometry) { + if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } @@ -173,6 +174,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + supports_viewport_layer = profile.support_gl_vertex_viewport_layer; SetupExtensions(header); stage = program.stage; switch (program.stage) { @@ -206,7 +208,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile program.workgroup_size[2]); break; } - SetupOutPerVertex(stage, info, header); + SetupOutPerVertex(*this, header); for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { @@ -276,7 +278,7 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } - if (info.stores_viewport_index && stage != Stage::Geometry) { + if (info.stores_viewport_index && supports_viewport_layer && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 5d48675e6..26a76f8a3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -159,6 +159,7 @@ public: bool uses_y_direction{}; bool uses_cc_carry{}; + bool supports_viewport_layer{}; private: void SetupExtensions(std::string& header); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index c48492a17..ebaf50abd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -226,6 +226,11 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_Position.{}={};", swizzle, value); break; case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && !ctx.supports_viewport_layer) { + // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport + // layer extension"); + break; + } ctx.Add("gl_ViewportIndex=floatBitsToInt({});", value); break; case IR::Attribute::ClipDistance0: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 5d269368a..420117132 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -85,6 +85,7 @@ struct Profile { bool support_derivative_control{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; + bool support_gl_vertex_viewport_layer{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a1ba363b..77681594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,6 +225,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), + .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 747b8556a4611791c1b0afbb500c77de57adfc54 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 4 Jun 2021 00:46:46 -0400 Subject: glsl: Use textureGrad fallback when EXT_texture_shadow_lod is unsupported --- .../backend/glsl/emit_context.cpp | 4 +- .../backend/glsl/emit_glsl_image.cpp | 44 ++++++++++++++++++---- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 42 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ecc7335ba..76cf0bdf0 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -282,8 +282,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n" - "#extension GL_EXT_texture_shadow_lod : enable\n" "#extension GL_EXT_shader_image_load_formatted : enable\n"; + if (profile.support_gl_texture_shadow_lod) { + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index a62e2b181..6cf0300ab 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -67,14 +68,14 @@ std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInf } } -std::string ShadowSamplerVecCast(TextureType type) { +bool NeedsShadowLodExt(TextureType type) { switch (type) { case TextureType::ColorArray2D: case TextureType::ColorCube: case TextureType::ColorArrayCube: - return "vec4"; + return true; default: - return "vec3"; + return false; } } @@ -221,7 +222,22 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, } const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; - const auto cast{ShadowSamplerVecCast(info.type)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && + ctx.stage != Stage::Fragment && needs_shadow_ext}; + if (use_grad) { + // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { @@ -263,15 +279,29 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; - const auto cast{ShadowSamplerVecCast(info.type)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + if (use_grad) { + // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (info.type == TextureType::ColorArrayCube) { ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, offset_str); } else { - ctx.AddF32("{}=textureLodOffset({},vec3({},{}),{},{});", inst, texture, coords, dref, - lod_lc, offset_str); + ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, + dref, lod_lc, offset_str); } } else { if (info.type == TextureType::ColorArrayCube) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 420117132..3bbd5a531 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -86,6 +86,7 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_vertex_viewport_layer{}; + bool support_gl_texture_shadow_lod{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 77681594a..b4c634d29 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), + .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 970fc39d986c5eefa1c4b61ac89ef7e8c2bf23bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 19:05:11 -0400 Subject: glsl: Rebase fixes --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_util.cpp | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4c634d29..3d229a78c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,7 +225,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 99cb81819..ac6f33e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader) { +static void LogShader(GLuint shader, std::optional code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,6 +28,9 @@ static void LogShader(GLuint shader) { glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); + if (code.has_value()) { + LOG_INFO(Render_OpenGL, "\n{}", *code); + } } else { LOG_WARNING(Render_OpenGL, "{}", log); } @@ -43,7 +46,7 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { glCompileShader(shader.handle); glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { - LogShader(shader.handle); + LogShader(shader.handle, code); } } -- cgit v1.2.3 From 8bb8bbf4ae2ef259857efe49436dfd71758ea092 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 01:55:12 -0400 Subject: glsl: Implement fswzadd and wip nv thread shuffle impl --- .../backend/glsl/emit_context.cpp | 11 +++++++ src/shader_recompiler/backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_warp.cpp | 36 +++++++++++++++++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 5 files changed, 45 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ed0955da0..6c2828644 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -306,6 +306,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupImages(bindings); SetupTextures(bindings); DefineHelperFunctions(); + DefineConstants(); } void EmitContext::SetupExtensions(std::string&) { @@ -339,6 +340,9 @@ void EmitContext::SetupExtensions(std::string&) { if (!info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } + if (profile.support_gl_warp_intrinsics) { + header += "#extension GL_NV_shader_thread_shuffle : enable\n"; + } } if (info.stores_viewport_index && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { @@ -605,4 +609,11 @@ void EmitContext::SetupTextures(Bindings& bindings) { } } +void EmitContext::DefineConstants() { + if (info.uses_fswzadd) { + header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" + "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; + } +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index dce99586e..2b0d22ce5 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -167,6 +167,7 @@ private: void DefineStorageBuffers(Bindings& bindings); void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); + void DefineConstants(); std::string DefineGlobalMemoryFunctions(); void SetupImages(Bindings& bindings); void SetupTextures(Bindings& bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 38c49b164..6ced0776c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -35,9 +35,17 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); } + +void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, + std::string_view value, std::string_view index, + [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { + const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; + ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); + SetInBoundsFlag(ctx, inst); +} } // namespace -void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); } @@ -103,6 +111,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); + return; + } const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; const auto thread_id{"gl_SubGroupInvocationARB"}; const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; @@ -117,6 +129,10 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; @@ -128,6 +144,10 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; @@ -139,6 +159,10 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; @@ -147,10 +171,12 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } -void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, - [[maybe_unused]] std::string_view swizzle) { - NotImplemented(); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; + const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); + const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); + ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3a4495070..246995190 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -86,6 +86,7 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; + bool support_gl_warp_intrinsics{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d229a78c..4fcf4e458 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_warp_intrinsics = false, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 6650c4799d42044f087a1ac5cb5e4b1a9e899000 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 23:52:28 -0400 Subject: gl_rasterizer: Add texture fetch barrier for fragments Fixes flicker seen in XC2 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 54696d97d..7513bd071 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -437,7 +437,7 @@ void RasterizerOpenGL::WaitForIdle() { } void RasterizerOpenGL::FragmentBarrier() { - glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); } void RasterizerOpenGL::TiledCacheBarrier() { -- cgit v1.2.3 From e81c73a8748ccfcde56acfee5630116c3950e479 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 02:50:30 -0400 Subject: glsl: Address more feedback. Implement indexed texture reads --- .../backend/glsl/emit_context.cpp | 48 +++----- src/shader_recompiler/backend/glsl/emit_context.h | 13 +- .../backend/glsl/emit_glsl_image.cpp | 136 ++++++++++----------- .../backend/glsl/emit_glsl_select.cpp | 4 +- .../backend/glsl/emit_glsl_shared_memory.cpp | 19 ++- src/video_core/renderer_opengl/gl_shader_util.cpp | 6 +- 6 files changed, 112 insertions(+), 114 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index cbcf0a1eb..ed10eca8a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -559,53 +559,45 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { } void EmitContext::SetupImages(Bindings& bindings) { - image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + image_buffers.reserve(info.image_buffer_descriptors.size()); for (const auto& desc : info.image_buffer_descriptors) { - image_buffer_bindings.push_back(bindings.image); - const auto indices{bindings.image + desc.count}; + image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{};", - bindings.image, format, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, + format, bindings.image, array_decorator); bindings.image += desc.count; } - image_bindings.reserve(info.image_descriptors.size()); + images.reserve(info.image_descriptors.size()); for (const auto& desc : info.image_descriptors) { - image_bindings.push_back(bindings.image); + images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; const auto qualifier{desc.is_written ? "" : "readonly "}; - const auto indices{bindings.image + desc.count}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}{})uniform {}{} img{};", bindings.image, format, - qualifier, image_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, + qualifier, image_type, bindings.image, array_decorator); bindings.image += desc.count; } } void EmitContext::SetupTextures(Bindings& bindings) { - texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { - texture_buffer_bindings.push_back(bindings.texture); + texture_buffers.push_back({bindings.texture, desc.count}); const auto sampler_type{SamplerType(TextureType::Buffer, false)}; - const auto indices{bindings.texture + desc.count}; - for (u32 index = bindings.texture; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, - sampler_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); bindings.texture += desc.count; } - texture_bindings.reserve(info.texture_descriptors.size()); + textures.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { + textures.push_back({bindings.texture, desc.count}); const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; - texture_bindings.push_back(bindings.texture); - const auto indices{bindings.texture + desc.count}; - for (u32 index = bindings.texture; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, - sampler_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); bindings.texture += desc.count; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 9d8be0c9a..685f56089 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -36,6 +36,11 @@ struct GenericElementInfo { u32 num_components{}; }; +struct TextureImageDefinition { + u32 binding; + u32 count; +}; + class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -142,10 +147,10 @@ public: std::string_view stage_name = "invalid"; std::string_view position_name = "gl_Position"; - std::vector texture_buffer_bindings; - std::vector image_buffer_bindings; - std::vector texture_bindings; - std::vector image_bindings; + std::vector texture_buffers; + std::vector image_buffers; + std::vector textures; + std::vector images; std::array, 32> output_generics{}; bool uses_y_direction{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 00fe288e2..6a98f7ac2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -12,20 +12,18 @@ namespace Shader::Backend::GLSL { namespace { -std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info) { - if (info.type == TextureType::Buffer) { - return fmt::format("tex{}", ctx.texture_buffer_bindings.at(info.descriptor_index)); - } else { - return fmt::format("tex{}", ctx.texture_bindings.at(info.descriptor_index)); - } +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) + : ctx.textures.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("tex{}{}", def.binding, index_offset); } -std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info) { - if (info.type == TextureType::Buffer) { - return fmt::format("img{}", ctx.image_buffer_bindings.at(info.descriptor_index)); - } else { - return fmt::format("img{}", ctx.image_bindings.at(info.descriptor_index)); - } +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) + : ctx.images.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("img{}{}", def.binding, index_offset); } std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { @@ -137,14 +135,14 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } } // Anonymous namespace -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view bias_lc, const IR::Value& offset) { +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; @@ -175,9 +173,9 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view lod_lc, const IR::Value& offset) { +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_bias) { throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); @@ -185,7 +183,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -208,8 +206,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; @@ -223,7 +220,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -263,8 +260,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view lod_lc, const IR::Value& offset) { const auto info{inst.Flags()}; @@ -278,7 +274,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -313,10 +309,10 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -355,11 +351,11 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR info.gather_component); } -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2, std::string_view dref) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -395,7 +391,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] cons *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); } -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, [[maybe_unused]] std::string_view ms) { const auto info{inst.Flags()}; @@ -405,7 +401,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR: if (info.has_lod_clamp) { throw NotImplementedException("EmitImageFetch Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; if (!sparse_inst) { @@ -433,10 +429,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR: } } -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view lod) { +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; switch (info.type) { case TextureType::Color1D: return ctx.AddU32x4( @@ -460,14 +456,14 @@ void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, throw LogicError("Unspecified image type {}", info.type.Value()); } -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); } -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& derivatives, const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { const auto info{inst.Flags()}; @@ -481,7 +477,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const if (!offset.IsEmpty()) { throw NotImplementedException("EmitImageGradient offset"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; @@ -494,65 +490,60 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const } } -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { throw NotImplementedException("EmitImageRead Sparse"); } - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, TexelFetchCastToInt(coords, info)); } -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); } -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } @@ -567,35 +558,34 @@ void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 7aa6096e6..49fba9073 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -28,12 +28,12 @@ void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU32("{}={}?uint({}):uint({});", inst, cond, true_value, false_value); + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU64("{}={}?uint64_t({}):uint64_t({});", inst, cond, true_value, false_value); + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 7abc6575f..8a13bf617 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -9,6 +9,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view bit_offset, u32 num_bits) { + const auto smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); +} +} // Anonymous namespace void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } @@ -39,13 +50,13 @@ void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offse } void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { - ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, - offset); + const auto bit_offset{fmt::format("int({}%4)*8", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 8); } void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { - ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, - value, offset); + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 16); } void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index ac6f33e34..5109985f1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader, std::optional code = {}) { +static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,8 +28,8 @@ static void LogShader(GLuint shader, std::optional code = {}) glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); - if (code.has_value()) { - LOG_INFO(Render_OpenGL, "\n{}", *code); + if (!code.empty()) { + LOG_INFO(Render_OpenGL, "\n{}", code); } } else { LOG_WARNING(Render_OpenGL, "{}", log); -- cgit v1.2.3 From 413eb6983f07bb4139cd07c5dca22bdb30e6af2d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:06:11 -0400 Subject: gl_shader_cache: Move OGL shader compilation to the respective Pipeline constructor --- .../renderer_opengl/gl_compute_pipeline.cpp | 13 +++- .../renderer_opengl/gl_compute_pipeline.h | 2 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 67 ++++++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 69 +++------------------- 5 files changed, 79 insertions(+), 76 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index a40106c87..f984b635c 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -39,10 +40,16 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + const std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { + if (device.UseAssemblyShaders()) { + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5dfb65e9..a93166eb6 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + const std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a2ea35d5a..4d62d7062 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -9,6 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/texture_cache/texture_cache.h" @@ -33,6 +34,40 @@ u32 AccumulateCount(const Range& range) { return num; } +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -82,19 +117,33 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{assembly_sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{glsl_sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 508fad5bb..984bf994f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4fcf4e458..884739aec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -56,40 +56,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -GLenum Stage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_SHADER; - case 1: - return GL_TESS_CONTROL_SHADER; - case 2: - return GL_TESS_EVALUATION_SHADER; - case 3: - return GL_GEOMETRY_SHADER; - case 4: - return GL_FRAGMENT_SHADER; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - -GLenum AssemblyStage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_PROGRAM_NV; - case 1: - return GL_TESS_CONTROL_PROGRAM_NV; - case 2: - return GL_TESS_EVALUATION_PROGRAM_NV; - case 3: - return GL_GEOMETRY_PROGRAM_NV; - case 4: - return GL_FRAGMENT_PROGRAM_NV; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { @@ -426,12 +392,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_programs; + std::array assembly_sources; + std::array glsl_sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; - if (!use_glasm) { - source_program.handle = glCreateProgram(); - } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -446,20 +410,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; - assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - AttachShader(Stage(stage_index), source_program.handle, code); + glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } - if (!use_glasm) { - LinkProgram(source_program.handle); - } return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -496,21 +454,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - - OGLAssemblyProgram asm_program; - OGLProgram source_program; - if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, info, program)}; - asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { - const auto code{EmitGLSL(profile, program)}; - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); - } + const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) + : EmitGLSL(profile, program)}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, - std::move(source_program), std::move(asm_program)); + kepler_compute, program_manager, program.info, code); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; -- cgit v1.2.3 From ff3de0fb6bb46bcb59421cef203ca8e8daaec85c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:11:13 -0400 Subject: gl_shader_cache: Remove const from pipeline source arguments --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 2 +- src/video_core/renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index f984b635c..2d6442d74 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -40,7 +40,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code) + std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { if (device.UseAssemblyShaders()) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index a93166eb6..b5fc45f26 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code); + std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 4d62d7062..d64723d6b 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -117,8 +117,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 984bf994f..dc791be53 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); -- cgit v1.2.3 From 5e7b2b9661bf685c3950d7c4065d0d35b488f95c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 13 Jun 2021 00:05:19 -0400 Subject: glsl: Add stubs for sparse queries and variable aoffi when not supported --- .../backend/glsl/emit_context.cpp | 2 +- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 46 ++++++++++++++++------ src/shader_recompiler/profile.h | 2 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 + 7 files changed, 47 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index f0e9dffc2..d0880bdcb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -384,7 +384,7 @@ void EmitContext::SetupExtensions() { profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } - if (info.uses_sparse_residency) { + if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } if (info.stores_viewport_mask && profile.support_viewport_mask) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index d76b63b2d..6d64913bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -215,7 +215,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header += fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); } - ctx.header += "\nvoid main(){\n"; + ctx.header += "void main(){\n"; if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; // TODO: Properly resolve attribute issues diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 51181d1c1..c6b3df9c9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -94,7 +94,11 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { break; } } - const auto offset_str{ctx.var_alloc.Consume(offset)}; + const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; + if (!has_var_aoffi) { + // LOG_WARNING("Device does not support variable texture offsets, STUBBING"); + } + const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; switch (offset.Type()) { case IR::Type::U32: return fmt::format("int({})", offset_str); @@ -146,7 +150,12 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { @@ -163,7 +172,6 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } return; } - // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); @@ -186,7 +194,12 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.IsEmpty()) { ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, GetOffsetVec(ctx, offset)); @@ -195,7 +208,6 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } return; } - // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, @@ -315,7 +327,12 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, info.gather_component); @@ -332,7 +349,6 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, info.gather_component); return; } - // TODO: Query sparseTexels extension support if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", *sparse_inst, texture, coords, texel, info.gather_component); @@ -358,7 +374,12 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); return; @@ -373,7 +394,6 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); return; } - // TODO: Query sparseTexels extension support if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, texture, coords, dref, texel); @@ -404,7 +424,12 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.empty()) { ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); @@ -418,7 +443,6 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } return; } - // TODO: Query sparseTexels extension support if (!offset.empty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 246995190..236c79a0a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -87,6 +87,8 @@ struct Profile { bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; bool support_gl_warp_intrinsics{}; + bool support_gl_variable_aoffi{}; + bool support_gl_sparse_textures{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 20ea42cff..bf08a6d93 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ff0ff2b08..0b59c9df0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool HasSparseTexture2() const { + return has_sparse_texture_2; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -165,6 +169,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 884739aec..3d59d34d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .support_gl_warp_intrinsics = false, + .support_gl_variable_aoffi = device.HasVariableAoffi(), + .support_gl_sparse_textures = device.HasSparseTexture2(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 74f683787eeba7b6e8f5868134f445240733f8fd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 21:06:29 -0400 Subject: gl_shader_cache: Implement async shaders --- src/video_core/CMakeLists.txt | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 123 ++++++++++++--------- .../renderer_opengl/gl_graphics_pipeline.h | 14 ++- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 54 +++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 34 ++---- src/video_core/renderer_opengl/gl_shader_context.h | 33 ++++++ 7 files changed, 154 insertions(+), 107 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_shader_context.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1ef3a6189..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -83,6 +83,7 @@ add_library(video_core STATIC renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h + renderer_opengl/gl_shader_context.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d64723d6b..d27a3cf46 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -6,11 +6,13 @@ #include #include "common/cityhash.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -117,74 +119,91 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{assembly_sources[stage]}; - if (code.empty()) { - continue; + auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{glsl_sources[stage]}; - if (code.empty()) { - continue; + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); } - AttachShader(Stage(stage), program.handle, code); + LinkProgram(program.handle); } - LinkProgram(program.handle); - } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + if (shader_notify) { + shader_notify->MarkShaderComplete(); } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += + AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += + AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } + is_built.store(true, std::memory_order_relaxed); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(nullptr); } } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index dc791be53..58deafd3c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,10 +20,15 @@ namespace OpenGL { +namespace ShaderContext { +struct Context; +} + class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderWorker = Common::StatefulThreadWorker; struct GraphicsPipelineKey { std::array unique_hashes; @@ -65,8 +70,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); @@ -82,6 +87,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -108,6 +117,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7513bd071..e3d336f86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -70,7 +70,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, - buffer_cache, program_manager, state_tracker), + buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d59d34d7..d082b9f73 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -17,7 +17,6 @@ #include "common/scope_exit.h" #include "common/thread_worker.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -50,6 +49,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; using VideoCommon::SerializePipeline; +using Context = ShaderContext::Context; template auto MakeSpan(Container& container) { @@ -143,25 +143,17 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace -struct ShaderCache::Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; -}; - ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_) + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, - use_asynchronous_shaders{device.UseAsynchronousShaders()}, + shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, profile{ .supported_spirv = 0x00010000, @@ -264,7 +256,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; std::lock_guard lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -311,6 +303,9 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (is_new) { program = CreateGraphicsPipeline(); } + if (!program || !program->IsBuilt()) { + return nullptr; + } return program.get(); } @@ -339,7 +334,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), + use_asynchronous_shaders)}; if (!pipeline || shader_cache_filename.empty()) { return pipeline; } @@ -354,8 +350,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs) try { + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -394,8 +390,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_sources; - std::array glsl_sources; + std::array sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -412,14 +407,16 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } + auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -442,9 +439,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( return pipeline; } -std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineKey& key, - Shader::Environment& env) try { +std::unique_ptr ShaderCache::CreateComputePipeline( + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -465,11 +462,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } -std::unique_ptr> ShaderCache::CreateWorkers() - const { - return std::make_unique>( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); +std::unique_ptr ShaderCache::CreateWorkers() const { + return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0c5a06d8..d24b54d90 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -13,13 +13,12 @@ #include "common/common_types.h" #include "common/thread_worker.h" -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_context.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -31,29 +30,17 @@ namespace OpenGL { class Device; class ProgramManager; class RasterizerOpenGL; - -struct ShaderPools { - void ReleaseContents() { - flow_block.ReleaseContents(); - block.ReleaseContents(); - inst.ReleaseContents(); - } - - Shader::ObjectPool inst; - Shader::ObjectPool block; - Shader::ObjectPool flow_block; -}; +using ShaderWorker = Common::StatefulThreadWorker; class ShaderCache : public VideoCommon::ShaderCache { - struct Context; - public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_); ~ShaderCache(); void LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -67,17 +54,17 @@ private: std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs); + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputePipeline(ShaderPools& pools, + std::unique_ptr CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env); - std::unique_ptr> CreateWorkers() const; + std::unique_ptr CreateWorkers() const; Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -85,17 +72,18 @@ private: BufferCache& buffer_cache; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineKey graphics_key{}; const bool use_asynchronous_shaders; - ShaderPools main_pools; + ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; Shader::Profile profile; std::filesystem::path shader_cache_filename; - std::unique_ptr> workers; + std::unique_ptr workers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/emu_window.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace OpenGL::ShaderContext { +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + +struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + +} // namespace OpenGL::ShaderContext -- cgit v1.2.3 From 6eea88d6149f7122777b325c7fc8549e2a974e64 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:02:07 -0400 Subject: glsl: Cleanup/Address feedback --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 8 ++++---- .../backend/glsl/emit_glsl_atomic.cpp | 12 ++++++------ .../backend/glsl/emit_glsl_composite.cpp | 3 +-- .../backend/glsl/emit_glsl_context_get_set.cpp | 18 +++++------------- .../backend/glsl/emit_glsl_integer.cpp | 1 + .../backend/glsl/emit_glsl_shared_memory.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 2 +- src/shader_recompiler/backend/glsl/var_alloc.cpp | 3 +-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 10 files changed, 24 insertions(+), 28 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 6d64913bb..9f8cf659f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -156,8 +156,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("if({}){{continue;}}else{{break;}}}}", - ctx.var_alloc.Consume(node.data.repeat.cond)); + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); break; default: throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); @@ -166,7 +165,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { return " compatibility"; } return ""; @@ -187,7 +186,8 @@ void DefineVariables(const EmitContext& ctx, std::string& header) { const auto type{static_cast(i)}; const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; const auto type_name{ctx.var_alloc.GetGlslType(type)}; - const auto precise{IsPreciseType(type) ? "precise " : ""}; + const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; + const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; // Temps/return types that are never used are stored at index 0 if (tracker.uses_temp) { header += fmt::format("{}{} t{}={}(0);", precise, type_name, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 9152ace98..772acc5a4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -98,7 +98,7 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, pointer_offset); ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", @@ -171,7 +171,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -182,7 +182,7 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -195,7 +195,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -207,7 +207,7 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -220,7 +220,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 7421ce97d..49a66e3ec 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -17,8 +17,7 @@ void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view // The result is aliased with the composite ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } else { - ctx.Add("{}={};", result, composite); - ctx.Add("{}.{}={};", result, SWIZZLE[index], object); + ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); } } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0d1e5ed7f..edeecc26e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -30,7 +30,7 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } -std::string OutputVertexIndex(EmitContext& ctx) { +std::string_view OutputVertexIndex(EmitContext& ctx) { return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } @@ -40,7 +40,7 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const bool is_immediate{offset.IsImmediate()}; if (is_immediate) { const s32 signed_offset{static_cast(offset.U32())}; - static constexpr u32 cbuf_size{4096 * 16}; + static constexpr u32 cbuf_size{0x10000}; if (signed_offset < 0 || offset.U32() > cbuf_size) { LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); ctx.Add("{}=0u;", ret); @@ -140,7 +140,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - static constexpr u32 cbuf_size{4096 * 16}; + static constexpr u32 cbuf_size{0x10000}; const u32 u32_offset{offset.U32()}; const s32 signed_offset{static_cast(offset.U32())}; if (signed_offset < 0 || u32_offset > cbuf_size) { @@ -308,21 +308,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::ColorFrontDiffuseG: case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: - if (ctx.stage == Stage::Fragment) { - ctx.Add("gl_Color.{}={};", swizzle, value); - } else { - ctx.Add("gl_FrontColor.{}={};", swizzle, value); - } + ctx.Add("gl_FrontColor.{}={};", swizzle, value); break; case IR::Attribute::ColorFrontSpecularR: case IR::Attribute::ColorFrontSpecularG: case IR::Attribute::ColorFrontSpecularB: case IR::Attribute::ColorFrontSpecularA: - if (ctx.stage == Stage::Fragment) { - ctx.Add("gl_SecondaryColor.{}={};", swizzle, value); - } else { - ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); - } + ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::ColorBackDiffuseR: case IR::Attribute::ColorBackDiffuseG: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 7a2f79d10..983e6d95d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -28,6 +28,7 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { sign->Invalidate(); } } // Anonymous namespace + void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 8a13bf617..518b78f06 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -20,6 +20,7 @@ void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); } } // Anonymous namespace + void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 7047928fd..4d418cbbc 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -43,7 +43,7 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); SetInBoundsFlag(ctx, inst); } -} // namespace +} // Anonymous namespace void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index cbf56491c..194f926ca 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -177,8 +177,7 @@ Id VarAlloc::Alloc(GlslVarType type) { void VarAlloc::Free(Id id) { if (id.is_valid == 0) { - // throw LogicError("Freeing invalid variable"); - return; + throw LogicError("Freeing invalid variable"); } auto& use_tracker{GetUseTracker(id.type)}; use_tracker.var_use[id.index] = false; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 236c79a0a..6db794e91 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -105,6 +105,8 @@ struct Profile { bool has_broken_signed_operations{}; /// Dynamic vec4 indexing is broken on some OpenGL drivers bool has_gl_component_indexing_bug{}; + /// The precise type qualifier is broken in the fragment stage of some drivers + bool has_gl_precise_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d082b9f73..5ffe28d45 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -196,6 +196,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), + .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, } { if (use_asynchronous_shaders) { -- cgit v1.2.3 From 3b339fbbf65a50ec2ec8baacd175ca7577c3b8bd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:33:26 -0400 Subject: glsl: Conditionally use fine/coarse derivatives based on device support --- .../backend/glsl/emit_context.cpp | 3 +++ .../backend/glsl/emit_glsl_warp.cpp | 28 ++++++++++++++++++---- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 29 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e18f8257e..0e8fe017d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -395,6 +395,9 @@ void EmitContext::SetupExtensions() { if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } + if (info.uses_derivatives && profile.support_gl_derivative_control) { + header += "#extension GL_ARB_derivative_control : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 4d418cbbc..a982dd8a2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -180,18 +180,38 @@ void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, st } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdxFine({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } } void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdyFine({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } } void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } } void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6db794e91..e8cfc03af 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -89,6 +89,7 @@ struct Profile { bool support_gl_warp_intrinsics{}; bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; + bool support_gl_derivative_control{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5ffe28d45..fedbce2f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_warp_intrinsics = false, .support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_sparse_textures = device.HasSparseTexture2(), + .support_gl_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:23:57 -0400 Subject: glsl: Address rest of feedback --- .../backend/glsl/emit_context.cpp | 44 +++++++++++++++++----- src/shader_recompiler/backend/glsl/emit_context.h | 2 + .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 22 +++++++---- src/shader_recompiler/ir_opt/texture_pass.cpp | 11 +++++- src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 2 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ .../renderer_opengl/gl_graphics_pipeline.cpp | 32 ++++++++-------- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 11 files changed, 86 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0e8fe017d..d224c4d84 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) { } } +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + std::string_view GetTessMode(TessPrimitive primitive) { switch (primitive) { case TessPrimitive::Triangles: @@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { - header += "#pragma optionNV(fastmath off)\n"; + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } SetupExtensions(); stage = program.stage; switch (program.stage) { @@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { + if (info.stores_generics[index] || StageInitializesVaryings()) { DefineGenericOutput(index, program.invocations); } } @@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } +bool EmitContext::StageInitializesVaryings() const noexcept { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + return true; + default: + return false; + } +} + void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; @@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_atomic_float : enable\n"; } if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { - header += "#extension NV_shader_atomic_fp16_vector : enable\n"; + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { if (profile.support_gl_nv_gpu_shader_5) { @@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() { if (info.stores_viewport_mask && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } - if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { + if (info.uses_typeless_image_reads) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } if (info.uses_derivatives && profile.support_gl_derivative_control) { @@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); } - write_func += "}"; - write_func_64 += "}"; - write_func_128 += "}"; + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; load_func += "return 0u;}"; load_func_64 += "return uvec2(0);}"; load_func_128 += "return uvec4(0);}"; @@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) { for (const auto& desc : info.image_buffer_descriptors) { image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, - format, bindings.image, array_decorator); + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); bindings.image += desc.count; } images.reserve(info.image_descriptors.size()); @@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) { images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; - const auto qualifier{desc.is_written ? "" : "readonly "}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, qualifier, image_type, bindings.image, array_decorator); diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8fa87c02c..4a50556e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,6 +136,8 @@ public: code += '\n'; } + [[nodiscard]] bool StageInitializesVaryings() const noexcept; + std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index edeecc26e..a241d18fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::FogCoordinate: - ctx.Add("gl_FogFragCoord.x={};", value); + ctx.Add("gl_FogFragCoord={};", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index cfef58d79..59ca52f07 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -10,6 +10,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void InitializeVaryings(EmitContext& ctx) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + // TODO: Properly resolve attribute issues + for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { + if (!ctx.info.stores_generics[index]) { + ctx.Add("out_attr{}=vec4(0,0,0,1);", index); + } + } +} +} // Anonymous namespace void EmitPhi(EmitContext& ctx, IR::Inst& phi) { const size_t num_args{phi.NumArgs()}; @@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { - ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { - ctx.Add("out_attr{}=vec4(0,0,0,1);", index); - } - } + if (ctx.StageInitializesVaryings()) { + InitializeVaryings(ctx); } } @@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e9098239d..737f186ab 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -312,11 +312,14 @@ public: } u32 Add(const ImageBufferDescriptor& desc) { - return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + image_buffer_descriptors[index].is_written |= desc.is_written; + image_buffer_descriptors[index].is_read |= desc.is_read; + return index; } u32 Add(const TextureDescriptor& desc) { @@ -339,6 +342,7 @@ public: desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; + image_descriptors[index].is_read |= desc.is_read; return index; } @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { .type = flags.type, .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e8cfc03af..a3c412a0f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -97,6 +97,8 @@ struct Profile { /// Fragment outputs have to be declared even if they are not written to avoid undefined values. /// See Ori and the Blind Forest's main menu for reference. bool need_declared_frag_colors{}; + /// Prevents fast math optimizations that may cause inaccuracies + bool need_fastmath_off{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 74d7a6a94..e9ebc16a4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vectorMarkShaderComplete(); + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } is_built.store(true, std::memory_order_relaxed); }}; if (thread_worker) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, -- cgit v1.2.3 From 69f9b97e7ed1e873657105cff27ed9095ee277ed Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 15 Jun 2021 22:48:55 -0300 Subject: vulkan_device: Blacklist VK_EXT_vertex_input_dynamic_state on Intel --- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index bf063c047..9754abcf8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -491,6 +491,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); ext_extended_dynamic_state = false; } + if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { + LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); + ext_vertex_input_dynamic_state = false; + } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); -- cgit v1.2.3 From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- src/shader_recompiler/CMakeLists.txt | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 203 --------------------- src/shader_recompiler/frontend/maxwell/program.h | 27 --- .../frontend/maxwell/translate_program.cpp | 203 +++++++++++++++++++++ .../frontend/maxwell/translate_program.h | 22 +++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 7 files changed, 229 insertions(+), 234 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 06ee50fff..f801a9f72 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -114,8 +114,6 @@ add_library(shader_recompiler STATIC frontend/maxwell/maxwell.inc frontend/maxwell/opcodes.cpp frontend/maxwell/opcodes.h - frontend/maxwell/program.cpp - frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -211,6 +209,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h + frontend/maxwell/translate_program.cpp + frontend/maxwell/translate_program.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp deleted file mode 100644 index 8489f9a5f..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" -#include "shader_recompiler/frontend/maxwell/translate/translate.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Maxwell { -namespace { -IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); - return blocks; -} - -void RemoveUnreachableBlocks(IR::Program& program) { - // Some blocks might be unreachable if a function call exists unconditionally - // If this happens the number of blocks and post order blocks will mismatch - if (program.blocks.size() == program.post_order_blocks.size()) { - return; - } - const auto begin{program.blocks.begin() + 1}; - const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; - program.blocks.erase(std::remove_if(begin, end, pred), end); -} - -void CollectInterpolationInfo(Environment& env, IR::Program& program) { - if (program.stage != Stage::Fragment) { - return; - } - const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - std::optional imap; - for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { - if (value == PixelImap::Unused) { - continue; - } - if (imap && imap != value) { - throw NotImplementedException("Per component interpolation"); - } - imap = value; - } - if (!imap) { - continue; - } - program.info.input_generics[index].interpolation = [&] { - switch (*imap) { - case PixelImap::Unused: - case PixelImap::Perspective: - return Interpolation::Smooth; - case PixelImap::Constant: - return Interpolation::Flat; - case PixelImap::ScreenLinear: - return Interpolation::NoPerspective; - } - throw NotImplementedException("Unknown interpolation {}", *imap); - }(); - } -} - -void AddNVNStorageBuffers(IR::Program& program) { - if (!program.info.uses_global_memory) { - return; - } - const u32 driver_cbuf{0}; - const u32 descriptor_size{0x10}; - const u32 num_buffers{16}; - const u32 base{[&] { - switch (program.stage) { - case Stage::VertexA: - case Stage::VertexB: - return 0x110u; - case Stage::TessellationControl: - return 0x210u; - case Stage::TessellationEval: - return 0x310u; - case Stage::Geometry: - return 0x410u; - case Stage::Fragment: - return 0x510u; - case Stage::Compute: - return 0x310u; - } - throw InvalidArgument("Invalid stage {}", program.stage); - }()}; - auto& descs{program.info.storage_buffers_descriptors}; - for (u32 index = 0; index < num_buffers; ++index) { - if (!program.info.nvn_buffer_used[index]) { - continue; - } - const u32 offset{base + index * descriptor_size}; - const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; - if (it != descs.end()) { - it->is_written |= program.info.stores_global_memory; - continue; - } - descs.push_back({ - .cbuf_index = driver_cbuf, - .cbuf_offset = offset, - .count = 1, - .is_written = program.info.stores_global_memory, - }); - } -} -} // Anonymous namespace - -IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { - IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); - program.blocks = GenerateBlocks(program.syntax_list); - program.post_order_blocks = PostOrder(program.syntax_list.front()); - program.stage = env.ShaderStage(); - program.local_memory_size = env.LocalMemorySize(); - switch (program.stage) { - case Stage::TessellationControl: { - const ProgramHeader& sph{env.SPH()}; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Geometry: { - const ProgramHeader& sph{env.SPH()}; - program.output_topology = sph.common3.output_topology; - program.output_vertices = sph.common4.max_output_vertices; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Compute: - program.workgroup_size = env.WorkgroupSize(); - program.shared_memory_size = env.SharedMemorySize(); - break; - default: - break; - } - RemoveUnreachableBlocks(program); - - // Replace instructions before the SSA rewrite - Optimization::LowerFp16ToFp32(program); - - Optimization::SsaRewritePass(program); - - Optimization::GlobalMemoryToStorageBufferPass(program); - Optimization::TexturePass(env, program); - - Optimization::ConstantPropagationPass(program); - Optimization::DeadCodeEliminationPass(program); - Optimization::VerificationPass(program); - Optimization::CollectShaderInfoPass(env, program); - CollectInterpolationInfo(env, program); - AddNVNStorageBuffers(program); - return program; -} - -IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b) { - IR::Program result{}; - Optimization::VertexATransformPass(vertex_a); - Optimization::VertexBTransformPass(vertex_b); - for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; - } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); - } - result.blocks = GenerateBlocks(result.syntax_list); - result.post_order_blocks = vertex_b.post_order_blocks; - for (const auto& block : vertex_a.post_order_blocks) { - result.post_order_blocks.push_back(block); - } - result.stage = Stage::VertexB; - result.info = vertex_a.info; - result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; - } - Optimization::JoinTextureInfo(result.info, vertex_b.info); - Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DeadCodeEliminationPass(result); - Optimization::VerificationPass(result); - Optimization::CollectShaderInfoPass(env_vertex_b, result); - return result; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h deleted file mode 100644 index f7f5930e4..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::Maxwell { - -[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); - -[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..e52170e3e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + Optimization::LowerFp16ToFp32(program); + + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::VerificationPass(program); + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + for (size_t index = 0; index < 32; ++index) { + result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; + result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + } + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + Optimization::VerificationPass(result); + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..1e5536443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); + +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); + +} // namespace Shader::Maxwell diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 620666622..c05cd5d28 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,7 +22,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b17f34cdd..0b6fe8e2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,7 +20,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/program_header.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 8 +++++--- .../frontend/maxwell/translate_program.h | 3 ++- src/shader_recompiler/host_translate_info.h | 18 ++++++++++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 ++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +++ src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 9 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/host_translate_info.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f801a9f72..164e94071 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/translate.h frontend/maxwell/translate_program.cpp frontend/maxwell/translate_program.h + host_translate_info.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +// Try to keep entries here to a minimum +// They can accidentally change the cached information in a shader + +/// Misc information about the host +struct HostTranslateInfo { + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d28..b459397f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + }, + host_info{ + .support_float16 = false, + .support_int64 = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -373,15 +377,15 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); for (const auto& desc : programs[index].info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; for (const auto& desc : program_vb.info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } @@ -449,7 +453,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; u32 num_storage_buffers{}; for (const auto& desc : program.info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d90..6952a1f2c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" @@ -82,6 +83,8 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path shader_cache_filename; std::unique_ptr workers; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf8..0d8c6cd08 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - // is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From 0ffea97e2ea2c8f58928e13dc2488d620ea98ea8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:22:56 -0300 Subject: shader: Split profile and runtime info headers --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 1 + src/shader_recompiler/backend/glasm/emit_glasm.cpp | 1 + src/shader_recompiler/backend/glasm/emit_glasm.h | 1 + .../backend/glasm/emit_glasm_memory.cpp | 2 +- .../backend/glsl/emit_context.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl.h | 1 + .../backend/glsl/emit_glsl_instructions.h | 3 - src/shader_recompiler/backend/spirv/emit_context.h | 1 + src/shader_recompiler/profile.h | 72 ------------------- src/shader_recompiler/runtime_info.h | 83 ++++++++++++++++++++++ src/video_core/renderer_opengl/gl_shader_cache.h | 1 + src/video_core/transform_feedback.h | 2 +- 13 files changed, 93 insertions(+), 77 deletions(-) create mode 100644 src/shader_recompiler/runtime_info.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 164e94071..f6719ad9d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -226,6 +226,7 @@ add_library(shader_recompiler STATIC object_pool.h profile.h program_header.h + runtime_info.h shader_info.h ) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 08918a5c2..21e14867c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 832b4fd40..66e4aea04 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3df32a4a6..bcb55f062 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index cafb5c92a..af9fac7c1 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -8,7 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 54aa88b63..93057ebb9 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h index fe221fa7c..20e5719e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 9e812dabb..df28036e4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -18,9 +18,6 @@ class Value; namespace Shader::Backend::GLSL { class EmitContext; -inline void EmitSetLoopSafetyVariable(EmitContext&) {} -inline void EmitGetLoopSafetyVariable(EmitContext&) {} - #define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) // Microinstruction emitters diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 961c9180c..527685fb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -12,6 +12,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/shader_info.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a3c412a0f..d46be1638 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -4,59 +4,10 @@ #pragma once -#include -#include -#include - #include "common/common_types.h" namespace Shader { -enum class AttributeType : u8 { - Float, - SignedInt, - UnsignedInt, - Disabled, -}; - -enum class InputTopology { - Points, - Lines, - LinesAdjacency, - Triangles, - TrianglesAdjacency, -}; - -enum class CompareFunction { - Never, - Less, - Equal, - LessThanEqual, - Greater, - NotEqual, - GreaterThanEqual, - Always, -}; - -enum class TessPrimitive { - Isolines, - Triangles, - Quads, -}; - -enum class TessSpacing { - Equal, - FractionalOdd, - FractionalEven, -}; - -struct TransformFeedbackVarying { - u32 buffer{}; - u32 stride{}; - u32 offset{}; - u32 components{}; -}; - struct Profile { u32 supported_spirv{0x00010000}; @@ -114,27 +65,4 @@ struct Profile { bool ignore_nan_fp_comparisons{}; }; -struct RuntimeInfo { - std::array generic_input_types{}; - bool convert_depth_mode{}; - bool force_early_z{}; - - TessPrimitive tess_primitive{}; - TessSpacing tess_spacing{}; - bool tess_clockwise{}; - - InputTopology input_topology{}; - - std::optional fixed_state_point_size; - std::optional alpha_test_func; - float alpha_test_reference{}; - - // Static y negate value - bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM - bool glasm_use_storage_buffers{}; - - std::vector xfb_varyings; -}; - } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 000000000..d4b047b4d --- /dev/null +++ b/src/shader_recompiler/runtime_info.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" + +namespace Shader { + +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, + Disabled, +}; + +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + +struct TransformFeedbackVarying { + u32 buffer{}; + u32 stride{}; + u32 offset{}; + u32 components{}; +}; + +struct RuntimeInfo { + std::array generic_input_types{}; + bool convert_depth_mode{}; + bool force_early_z{}; + + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + + InputTopology input_topology{}; + + std::optional fixed_state_point_size; + std::optional alpha_test_func; + float alpha_test_reference{}; + + // Static y negate value + bool y_negate{}; + // Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; + + std::vector xfb_varyings; +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6952a1f2c..ff5707119 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -16,6 +16,7 @@ #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index 6832c6db1..8f6946d65 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -8,7 +8,7 @@ #include #include "common/common_types.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/engines/maxwell_3d.h" namespace VideoCommon { -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- .../backend/glsl/emit_context.cpp | 26 +++++++--------------- src/shader_recompiler/backend/glsl/emit_context.h | 2 -- .../backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ .../backend/glsl/emit_glsl_special.cpp | 18 +++++++-------- .../backend/spirv/emit_context.cpp | 3 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- .../frontend/maxwell/translate_program.cpp | 4 +++- .../ir_opt/collect_shader_info_pass.cpp | 6 +++-- src/shader_recompiler/runtime_info.h | 8 +++++-- src/shader_recompiler/shader_info.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 11 ++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +++++++++---- 12 files changed, 62 insertions(+), 41 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index bd40356a1..14c009535 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -327,11 +327,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; - if (generic.used) { - header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, - InputArrayDecorator(stage)); + if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + continue; } + header += + fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,10 +350,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } } for (size_t index = 0; index < info.stores_generics.size(); ++index) { - // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || StageInitializesVaryings()) { - DefineGenericOutput(index, program.invocations); + if (!info.stores_generics[index]) { + continue; } + DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -362,17 +363,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } -bool EmitContext::StageInitializesVaryings() const noexcept { - switch (stage) { - case Stage::VertexA: - case Stage::VertexB: - case Stage::Geometry: - return true; - default: - return false; - } -} - void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 4a50556e1..8fa87c02c 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,8 +136,6 @@ public: code += '\n'; } - [[nodiscard]] bool StageInitializesVaryings() const noexcept; - std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index a241d18fe..663ff3753 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { @@ -179,6 +180,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + ctx.AddF32("{}=0.f;", inst, attr); + return; + } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index f8e8aaa67..1a2d3dcea 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -12,11 +12,12 @@ namespace Shader::Backend::GLSL { namespace { -void InitializeVaryings(EmitContext& ctx) { - ctx.Add("gl_Position=vec4(0,0,0,1);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < 16; ++index) { + if (ctx.info.stores_generics[index]) { ctx.Add("out_attr{}=vec4(0,0,0,1);", index); } } @@ -56,9 +57,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.StageInitializesVaryings()) { - InitializeVaryings(ctx); - } + InitializeOutputVaryings(ctx); + if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { for (size_t index = 0; index < ctx.info.stores_frag_color.size(); ++index) { if (ctx.info.stores_frag_color[index]) { @@ -73,7 +73,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); - InitializeVaryings(ctx); + InitializeOutputVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 007b79650..612d087ad 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1209,6 +1209,9 @@ void EmitContext::DefineInputs(const Info& info) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { + if (!runtime_info.previous_stage_stores_generic[index]) { + continue; + } const InputVarying generic{info.input_generics[index]}; if (!generic.used) { continue; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 42fff74e3..4ac1fbae5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -286,7 +286,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type) { + if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 5250509c1..ed8729fca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -192,7 +192,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + if (vertex_b.info.stores_generics[index]) { + result.info.stores_generics[index] = true; + } } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 47933df97..bab32b58b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,7 +79,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { void SetAttribute(Info& info, IR::Attribute attr) { if (IR::IsGeneric(attr)) { - info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; + info.stores_generics[IR::GenericAttributeIndex(attr)] = true; return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { @@ -956,7 +956,9 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } if (info.stores_indexed_attributes) { for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); + if (header.vtg.IsOutputGenericVectorActive(i)) { + info.stores_generics[i] = true; + } } info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; info.stores_position |= header.vtg.omap_systemb.position != 0; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index d4b047b4d..63fe2afaf 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -59,6 +60,8 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; + std::bitset<32> previous_stage_stores_generic{}; + bool convert_depth_mode{}; bool force_early_z{}; @@ -72,11 +75,12 @@ struct RuntimeInfo { std::optional alpha_test_func; float alpha_test_reference{}; - // Static y negate value + /// Static Y negate value bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM + /// Use storage buffers instead of global pointers on GLASM bool glasm_use_storage_buffers{}; + /// Transform feedback state for each varying std::vector xfb_varyings; }; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e9ebc16a4..a20e15d2e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -140,7 +140,7 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::array stores_generics{}; + std::bitset<32> stores_generics{}; bool stores_layer{}; bool stores_viewport_index{}; bool stores_point_size{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b459397f5..b8b24dd3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -58,8 +58,15 @@ auto MakeSpan(Container& container) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, + const Shader::IR::Program* previous_program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } switch (program.stage) { case Shader::Stage::VertexB: case Shader::Stage::Geometry: @@ -400,6 +407,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; Shader::Backend::Bindings binding; + Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { @@ -413,12 +421,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( infos[stage_index] = &program.info; const auto runtime_info{ - MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } + previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72e6f4207..dc028306a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -90,7 +90,7 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso return {}; } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { if (attr.enabled == 0) { return Shader::AttributeType::Disabled; } @@ -124,9 +124,15 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; - + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; @@ -499,6 +505,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; + const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -511,7 +518,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -519,6 +526,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } + previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; -- cgit v1.2.3 From ca67077ca87772b4b4ac61d08f5b2c60616348e0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 21:14:57 -0300 Subject: vk_graphics_pipeline: Use VK_KHR_push_descriptor when available ~51% faster on Nvidia compared to previous method. --- src/video_core/renderer_vulkan/pipeline_helper.h | 32 ++++++++++++++++------ .../renderer_vulkan/vk_compute_pipeline.cpp | 8 ++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 ++++++++++++------- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + src/video_core/vulkan_common/vulkan_device.cpp | 11 ++++++++ src/video_core/vulkan_common/vulkan_device.h | 12 ++++++++ src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 31 +++++++++++++-------- 8 files changed, 88 insertions(+), 36 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index c6e5e059b..4847db6b6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -16,38 +16,50 @@ #include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" +#include "video_core/vulkan_common/vulkan_device.h" namespace Vulkan { class DescriptorLayoutBuilder { public: - DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} + DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} - vk::DescriptorSetLayout CreateDescriptorSetLayout() const { + bool CanUsePushDescriptor() const noexcept { + return device->IsKhrPushDescriptorSupported() && + num_descriptors <= device->MaxPushDescriptors(); + } + + vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { if (bindings.empty()) { return nullptr; } - return device->CreateDescriptorSetLayout({ + const VkDescriptorSetLayoutCreateFlags flags = + use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; + return device->GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .pNext = nullptr, - .flags = 0, + .flags = flags, .bindingCount = static_cast(bindings.size()), .pBindings = bindings.data(), }); } vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, - VkPipelineLayout pipeline_layout) const { + VkPipelineLayout pipeline_layout, + bool use_push_descriptor) const { if (entries.empty()) { return nullptr; } - return device->CreateDescriptorUpdateTemplateKHR({ + const VkDescriptorUpdateTemplateType type = + use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR + : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; + return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, .pNext = nullptr, .flags = 0, .descriptorUpdateEntryCount = static_cast(entries.size()), .pDescriptorUpdateEntries = entries.data(), - .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .templateType = type, .descriptorSetLayout = descriptor_set_layout, .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, .pipelineLayout = pipeline_layout, @@ -56,7 +68,7 @@ public: } vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { - return device->CreatePipelineLayout({ + return device->GetLogical().CreatePipelineLayout({ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -97,14 +109,16 @@ private: .stride = sizeof(DescriptorUpdateEntry), }); ++binding; + num_descriptors += descriptors[i].count; offset += sizeof(DescriptorUpdateEntry); } } - const vk::Device* device{}; + const Device* device{}; boost::container::small_vector bindings; boost::container::small_vector entries; u32 binding{}; + u32 num_descriptors{}; size_t offset{}; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index cc855a62e..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript uniform_buffer_sizes.begin()); auto func{[this, &descriptor_pool, shader_notify] { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); - descriptor_set_layout = builder.CreateDescriptorSetLayout(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(false); pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); descriptor_update_template = - builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); + builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, .pNext = nullptr, @@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); - if (!descriptor_set_layout) { return; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e02b1b7ab..2b59a9d88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; constexpr size_t MAX_IMAGE_ELEMENTS = 64; DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span infos) { - DescriptorLayoutBuilder builder{device.GetLogical()}; + DescriptorLayoutBuilder builder{device}; for (size_t index = 0; index < infos.size(); ++index) { static constexpr std::array stages{ VK_SHADER_STAGE_VERTEX_BIT, @@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline( } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; - descriptor_set_layout = builder.CreateDescriptorSetLayout(); - descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); - + uses_push_descriptor = builder.CanUsePushDescriptor(); + descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); + if (!uses_push_descriptor) { + descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); + } const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; pipeline_layout = builder.CreatePipelineLayout(set_layout); - descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); + descriptor_update_template = + builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); @@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() { if (!descriptor_set_layout) { return; } - const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - const vk::Device& dev{device.GetLogical()}; - dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_set, nullptr); + if (uses_push_descriptor) { + cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, + 0, descriptor_data); + } else { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + const vk::Device& dev{device.GetLogical()}; + dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_set, nullptr); + } }); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 40d1edabd..622267147 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -148,6 +148,7 @@ private: std::condition_variable build_condvar; std::mutex build_mutex; std::atomic_bool is_built{false}; + bool uses_push_descriptor{false}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0d8c6cd08..9d918de8d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -767,6 +767,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); + test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); @@ -932,6 +933,16 @@ std::vector Device::LoadExtensions(bool requires_surface) { khr_workgroup_memory_explicit_layout = true; } } + if (khr_push_descriptor) { + VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; + push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; + push_descriptor.pNext = nullptr; + + physical_properties.pNext = &push_descriptor; + physical.GetProperties2KHR(physical_properties); + + max_push_descriptors = push_descriptor.maxPushDescriptors; + } return extensions; } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4fda472b0..49605752d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -154,6 +154,11 @@ public: return guest_warp_stages & stage; } + /// Returns the maximum number of push descriptors. + u32 MaxPushDescriptors() const { + return max_push_descriptors; + } + /// Returns true if formatless image load is supported. bool IsFormatlessImageLoadSupported() const { return is_formatless_image_load_supported; @@ -194,6 +199,11 @@ public: return khr_spirv_1_4; } + /// Returns true if the device supports VK_KHR_push_descriptor. + bool IsKhrPushDescriptorSupported() const { + return khr_push_descriptor; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -330,6 +340,7 @@ private: VkDriverIdKHR driver_id{}; ///< Driver ID. VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. u64 device_access_memory{}; ///< Total size of device local memory in bytes. + u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. bool is_float16_supported{}; ///< Support for float16 arithmetics. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. @@ -345,6 +356,7 @@ private: bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. + bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 7e13ae8af..d7e9fac22 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdFillBuffer); X(vkCmdPipelineBarrier); X(vkCmdPushConstants); + X(vkCmdPushDescriptorSetWithTemplateKHR); X(vkCmdSetBlendConstants); X(vkCmdSetDepthBias); X(vkCmdSetDepthBounds); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 6e5be1186..d43b606f1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; PFN_vkBindBufferMemory vkBindBufferMemory{}; PFN_vkBindImageMemory vkBindImageMemory{}; + PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBeginQuery vkCmdBeginQuery{}; PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; - PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; PFN_vkCmdBindPipeline vkCmdBindPipeline{}; PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; + PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; PFN_vkCmdBlitImage vkCmdBlitImage{}; PFN_vkCmdClearAttachments vkCmdClearAttachments{}; PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; @@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdDispatch vkCmdDispatch{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; + PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdEndQuery vkCmdEndQuery{}; PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; - PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; PFN_vkCmdFillBuffer vkCmdFillBuffer{}; PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; PFN_vkCmdPushConstants vkCmdPushConstants{}; + PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; + PFN_vkCmdResolveImage vkCmdResolveImage{}; PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; + PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; - PFN_vkCmdSetEvent vkCmdSetEvent{}; - PFN_vkCmdSetScissor vkCmdSetScissor{}; - PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; - PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; - PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; - PFN_vkCmdSetViewport vkCmdSetViewport{}; - PFN_vkCmdWaitEvents vkCmdWaitEvents{}; - PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; - PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; + PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; + PFN_vkCmdSetScissor vkCmdSetScissor{}; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; - PFN_vkCmdResolveImage vkCmdResolveImage{}; + PFN_vkCmdSetViewport vkCmdSetViewport{}; + PFN_vkCmdWaitEvents vkCmdWaitEvents{}; PFN_vkCreateBuffer vkCreateBuffer{}; PFN_vkCreateBufferView vkCreateBufferView{}; PFN_vkCreateCommandPool vkCreateCommandPool{}; @@ -990,6 +991,12 @@ public: dynamic_offsets.size(), dynamic_offsets.data()); } + void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, + VkPipelineLayout layout, u32 set, + const void* data) const noexcept { + dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); + } + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { dld->vkCmdBindPipeline(handle, bind_point, pipeline); } -- cgit v1.2.3 From fcff19e0fa3d21130bc7b6cd50a10db102b5d4d7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 17 Jun 2021 23:12:41 -0400 Subject: shaders: Allow shader notify when async shaders is disabled --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 8 ++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b8b24dd3d..8aaadccc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -430,10 +430,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique( - device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, + maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dc028306a..e83628c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -529,11 +529,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, - texture_cache, notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -596,9 +595,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, notify, program.info, + thread_worker, &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { -- cgit v1.2.3 From 838d7e4ca59b79dc9a8dd727a12dfba00e73242c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 18 Jun 2021 03:22:00 -0300 Subject: buffer_cache: Fix size reductions not having in mind bind sizes A buffer binding can change between shaders without changing the shaders. This lead to outdated bindings on OpenGL. --- src/video_core/buffer_cache/buffer_cache.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d004199ba..a75de4384 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -405,8 +405,6 @@ private: u32 written_compute_texture_buffers = 0; u32 image_compute_texture_buffers = 0; - std::array fast_bound_uniform_buffers{}; - std::array uniform_cache_hits{}; std::array uniform_cache_shots{}; @@ -416,6 +414,10 @@ private: std::conditional_t, Empty> dirty_uniform_buffers{}; + std::conditional_t, Empty> fast_bound_uniform_buffers{}; + std::conditional_t, NUM_STAGES>, Empty> + uniform_buffer_binding_sizes{}; std::vector cached_write_buffer_ids; @@ -684,6 +686,7 @@ void BufferCache

::SetUniformBuffersState(const std::array& m fast_bound_uniform_buffers.fill(0); } dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } } enabled_uniform_buffer_masks = mask; @@ -1016,14 +1019,18 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size); + !buffer.IsRegionGpuModified(cpu_addr, size) && false; if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia - if (!HasFastUniformBufferBound(stage, binding_index)) { + const bool should_fast_bind = + !HasFastUniformBufferBound(stage, binding_index) || + uniform_buffer_binding_sizes[stage][binding_index] != size; + if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1033,6 +1040,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } if constexpr (IS_OPENGL) { fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -1046,9 +1054,13 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } ++uniform_cache_shots[0]; - if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { - // Skip binding if it's not needed and if the bound buffer is not the fast version - // This exists to avoid instances where the fast buffer is bound and a GPU write happens + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + needs_bind |= HasFastUniformBufferBound(stage, binding_index); + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + } + if (!needs_bind) { return; } const u32 offset = buffer.Offset(cpu_addr); @@ -1060,6 +1072,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + uniform_buffer_binding_sizes[stage][binding_index] = size; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -1725,6 +1740,7 @@ template void BufferCache

::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d.dirty.flags; flags[Dirty::IndexBuffer] = true; -- cgit v1.2.3 From df9b7e18f5c5bab84cc8c38214a0e1e9e9506bd4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 19 Jun 2021 17:28:46 -0300 Subject: buffer_cache: Fix debugging leftover --- src/video_core/buffer_cache/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a75de4384..cb9c69baf 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1019,7 +1019,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && - !buffer.IsRegionGpuModified(cpu_addr, size) && false; + !buffer.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { -- cgit v1.2.3 From 218dedca1f8572bc0e43f8e7ea577f4ece28c4c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:00:38 -0300 Subject: gl_graphics_pipeline: Port optimizations from Vulkan pipelines --- .../renderer_opengl/gl_graphics_pipeline.cpp | 180 ++++++++++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 18 ++- 2 files changed, 141 insertions(+), 57 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 8d11fbc55..6b62fa1da 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,6 +15,12 @@ #include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; @@ -98,13 +104,76 @@ std::pair TransformFeedbackEnum(u8 location) { return {GL_POSITION, 0}; } -struct Spec { +template +bool Passes(const std::array& stage_infos, u32 enabled_mask) { + for (size_t stage = 0; stage < stage_infos.size(); ++stage) { + if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& stage_infos, u32 enabled_mask) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(stage_infos, enabled_mask)) { + return FindSpec(stage_infos, enabled_mask); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { static constexpr std::array enabled_stages{true, true, true, true, true}; static constexpr bool has_storage_buffers = true; static constexpr bool has_texture_buffers = true; static constexpr bool has_image_buffers = true; static constexpr bool has_images = true; }; + +ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 enabled_mask) { + return FindSpec(infos, enabled_mask); +} } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -129,8 +198,52 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + auto& info{stage_infos[stage]}; + if (infos[stage]) { + info = *infos[stage]; + enabled_stages_mask |= 1u << stage; + } + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); @@ -142,7 +255,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } else { AttachShader(Stage(stage), program.handle, code); } @@ -150,49 +262,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (!device.UseAssemblyShaders()) { LinkProgram(program.handle); } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += - AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += - AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } if (shader_notify) { shader_notify->MarkShaderComplete(); } @@ -205,7 +274,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -void GraphicsPipeline::Configure(bool is_indexed) { +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; @@ -221,7 +291,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -311,7 +381,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; @@ -430,6 +500,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { } } +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal @@ -475,9 +550,4 @@ void GraphicsPipeline::GenerateTransformFeedbackState( num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } -void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 58deafd3c..a3546daa8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -75,7 +75,9 @@ public: const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); - void Configure(bool is_indexed); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } void ConfigureTransformFeedback() const { if (num_xfb_attribs != 0) { @@ -91,11 +93,21 @@ public: return is_built.load(std::memory_order::relaxed); } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + template + void ConfigureImpl(bool is_indexed); void ConfigureTransformFeedbackImpl() const; + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -103,6 +115,8 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + void (*configure_func)(GraphicsPipeline*, bool){}; + OGLProgram program; std::array assembly_programs; u32 enabled_stages_mask{}; -- cgit v1.2.3 From f5db8c74405c93b52efbdef318790bd9ec4661c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:23:50 -0300 Subject: gl_shader_cache: Check previous pipeline before checking hash map Port optimization from Vulkan. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 33 ++++++++++------------ .../renderer_opengl/gl_graphics_pipeline.h | 9 ++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 20 +++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 5 +++- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +- 5 files changed, 41 insertions(+), 29 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 6b62fa1da..92974ba08 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -184,17 +184,15 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, - BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - ShaderWorker* thread_worker, - VideoCore::ShaderNotify* shader_notify, - std::array sources, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { +GraphicsPipeline::GraphicsPipeline( + const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, std::array sources, + const std::array& infos, const GraphicsPipelineKey& key_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, key{key_} { if (shader_notify) { shader_notify->MarkShaderBuilding(); } @@ -241,10 +239,10 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && key.xfb_enabled) { + GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); } @@ -505,15 +503,14 @@ void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); } -void GraphicsPipeline::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { +void GraphicsPipeline::GenerateTransformFeedbackState() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = xfb_state.layouts[feedback]; + const auto& layout = key.xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -528,7 +525,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = xfb_state.varyings[feedback]; + const auto& locations = key.xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a3546daa8..a033d4a95 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -73,7 +73,7 @@ public: ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); + const GraphicsPipelineKey& key_); void Configure(bool is_indexed) { configure_func(this, is_indexed); @@ -85,6 +85,10 @@ public: } } + [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept { + return key; + } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { return writes_global_memory; } @@ -106,7 +110,7 @@ private: void ConfigureTransformFeedbackImpl() const; - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + void GenerateTransformFeedbackState(); TextureCache& texture_cache; BufferCache& buffer_cache; @@ -114,6 +118,7 @@ private: Tegra::Engines::Maxwell3D& maxwell3d; ProgramManager& program_manager; StateTracker& state_tracker; + const GraphicsPipelineKey key; void (*configure_func)(GraphicsPipeline*, bool){}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8aaadccc4..c36b0d8cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -298,6 +298,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; return nullptr; } const auto& regs{maxwell3d.regs}; @@ -313,15 +314,23 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (graphics_key.xfb_enabled) { SetXfbState(graphics_key.xfb_state, regs); } + if (current_pipeline && graphics_key == current_pipeline->Key()) { + return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + } + return CurrentGraphicsPipelineSlowPath(); +} + +GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& program{pair->second}; + auto& pipeline{pair->second}; if (is_new) { - program = CreateGraphicsPipeline(); + pipeline = CreateGraphicsPipeline(); } - if (!program || !program->IsBuilt()) { + current_pipeline = pipeline.get(); + if (!pipeline || !pipeline->IsBuilt()) { return nullptr; } - return program.get(); + return pipeline.get(); } ComputePipeline* ShaderCache::CurrentComputePipeline() { @@ -432,8 +441,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, &shader_notify, sources, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ff5707119..16873fcec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -53,6 +53,8 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -75,9 +77,10 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; VideoCore::ShaderNotify& shader_notify; + const bool use_asynchronous_shaders; GraphicsPipelineKey graphics_key{}; - const bool use_asynchronous_shaders; + GraphicsPipeline* current_pipeline{}; ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 42da2960b..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -146,12 +146,11 @@ private: BufferCache& buffer_cache; TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; + bool use_asynchronous_shaders{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - bool use_asynchronous_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 9bd05313849f76fc64406d5ebf3aadf39fa3bfde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:35:30 -0300 Subject: gl_graphics_pipeline: Inline hash and operator== key functions --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 10 ---------- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 92974ba08..ad61a17a5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include -#include -#include "common/cityhash.h" #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -176,14 +174,6 @@ ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 ena } } // Anonymous namespace -size_t GraphicsPipelineKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - GraphicsPipeline::GraphicsPipeline( const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a033d4a95..f82d712f8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -5,10 +5,12 @@ #pragma once #include +#include #include #include #include "common/bit_field.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" @@ -44,9 +46,13 @@ struct GraphicsPipelineKey { std::array padding; VideoCommon::TransformFeedbackState xfb_state; - size_t Hash() const noexcept; + size_t Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); + } - bool operator==(const GraphicsPipelineKey&) const noexcept; + bool operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; + } bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { return !operator==(rhs); -- cgit v1.2.3 From 3877918e9657bcde160080aecc1821cf8cb50ea4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 17:09:50 -0300 Subject: gl_graphics_pipeline: Fix assembly shaders check for transform feedbacks --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index ad61a17a5..a93b03cf7 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -229,7 +229,7 @@ GraphicsPipeline::GraphicsPipeline( writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && key.xfb_enabled) { + if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 6 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc..c084f3400 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Wed, 23 Jun 2021 02:41:00 -0300 Subject: shader: Unify shader stage types --- src/shader_recompiler/stage.h | 11 +++++++++-- src/video_core/engines/kepler_compute.cpp | 1 - src/video_core/engines/maxwell_3d.cpp | 1 - src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 21 --------------------- src/video_core/renderer_opengl/gl_device.cpp | 18 ++++++++++-------- src/video_core/renderer_opengl/gl_device.h | 11 ++++++----- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 -- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_cache.h | 1 - src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 15 ++++++++------- src/video_core/renderer_vulkan/maxwell_to_vk.h | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- src/video_core/shader_environment.cpp | 2 +- 15 files changed, 37 insertions(+), 55 deletions(-) delete mode 100644 src/video_core/engines/shader_type.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index 7d4f2c0bb..5c1c8d8fc 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -9,13 +9,20 @@ namespace Shader { enum class Stage : u32 { - Compute, - VertexA, VertexB, TessellationControl, TessellationEval, Geometry, Fragment, + + Compute, + + VertexA, }; +constexpr u32 MaxStageTypes = 6; + +[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { + return static_cast(static_cast(Stage::VertexB) + index); +} } // namespace Shader diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index cae93c470..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 103a51fd0..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d5790f6..fc2c36c6b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -20,7 +20,6 @@ #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro/macro.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Tegra::Engines { - -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; -static constexpr std::size_t MaxShaderTypes = 6; - -} // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b1b5ba1ab..27be347e6 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -59,16 +60,18 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -std::array BuildMaxUniformBuffers() noexcept { - std::array max; - std::ranges::transform(LIMIT_UBOS, max.begin(), - [](GLenum pname) { return GetInteger(pname); }); +std::array BuildMaxUniformBuffers() noexcept { + std::array max; + std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger); return max; } bool IsASTCSupported() { - static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; - static constexpr std::array formats = { + static constexpr std::array targets{ + GL_TEXTURE_2D, + GL_TEXTURE_2D_ARRAY, + }; + static constexpr std::array formats{ GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, @@ -84,11 +87,10 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - static constexpr std::array required_support = { + static constexpr std::array required_support{ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, }; - for (const GLenum target : targets) { for (const GLenum format : formats) { for (const GLenum support : required_support) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0bd277d38..ad7b01b06 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,7 +6,7 @@ #include #include "common/common_types.h" -#include "video_core/engines/shader_type.h" +#include "shader_recompiler/stage.h" namespace OpenGL { @@ -16,8 +16,8 @@ public: [[nodiscard]] std::string GetVendorName() const; - u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { - return max_uniform_buffers[static_cast(shader_type)]; + u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { + return max_uniform_buffers[static_cast(stage)]; } size_t GetUniformBufferAlignment() const { @@ -148,8 +148,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::string vendor_name; - std::array max_uniform_buffers{}; + std::array max_uniform_buffers{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; @@ -181,6 +180,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + + std::string vendor_name; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e3d336f86..0f0d780b5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -23,7 +23,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" @@ -40,7 +39,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using GLvec4 = std::array; -using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f2f18b18a..5af9b7745 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -26,7 +26,6 @@ #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16873fcec..9d5306293 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -17,7 +17,6 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_context.h" diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f0b0b8ec..8f9b9a11a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 50a599c11..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2b59a9d88..9eb353a88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -737,7 +737,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), .module = *spv_modules[stage], .pName = "main", .pSpecializationInfo = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c57e16c50..f04c3394c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,8 +58,6 @@ struct DrawParams { bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 6243cd176..d463e2b56 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 3; +constexpr u32 CACHE_VERSION = 4; constexpr size_t INST_SIZE = sizeof(u64); -- cgit v1.2.3 From 4f052a1f393d45843eabc237e21757be15f20062 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 03:32:41 -0300 Subject: vk_graphics_pipeline: Implement conservative rendering --- src/video_core/engines/maxwell_3d.h | 7 ++++- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 33 ++++++++++++++++------ src/video_core/vulkan_common/vulkan_device.cpp | 6 ++++ src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 6 files changed, 44 insertions(+), 10 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index fc2c36c6b..da2ded671 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -910,7 +910,11 @@ public: u32 fill_rectangle; - INSERT_PADDING_WORDS_NOINIT(0x8); + INSERT_PADDING_WORDS_NOINIT(0x2); + + u32 conservative_raster_enable; + + INSERT_PADDING_WORDS_NOINIT(0x5); std::array vertex_attrib_format; @@ -1615,6 +1619,7 @@ ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(render_area, 0x3FD); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); +ASSERT_REG_POSITION(conservative_raster_enable, 0x452); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(multisample_sample_locations, 0x478); ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 16cef8711..7563dc462 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -87,6 +87,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, depth_format.Assign(static_cast(regs.zeta.format)); y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); + conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 04f34eb97..66b57b636 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -194,6 +194,7 @@ struct FixedPipelineState { BitField<6, 5, u32> depth_format; BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; + BitField<13, 1, u32> conservative_raster_enable; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9eb353a88..70e183e65 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -599,16 +599,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pScissors = nullptr, }; - const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, - .pNext = nullptr, - .provokingVertexMode = key.state.provoking_vertex_last != 0 - ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT - : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, - }; - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ + VkPipelineRasterizationStateCreateInfo rasterization_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = device.IsExtProvokingVertexSupported() ? &provoking_vertex : nullptr, + .pNext = nullptr, .flags = 0, .depthClampEnable = static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), @@ -625,6 +618,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 + ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT + : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, + .extraPrimitiveOverestimationSize = 0.0f, + }; + VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .provokingVertexMode = key.state.provoking_vertex_last != 0 + ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT + : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, + }; + if (device.IsExtConservativeRasterizationSupported()) { + conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); + } + if (device.IsExtProvokingVertexSupported()) { + provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); + } const VkPipelineMultisampleStateCreateInfo multisample_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9d918de8d..7b184d2f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -412,6 +412,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + if (!ext_conservative_rasterization) { + LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); + } + VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex; if (ext_provoking_vertex) { provoking_vertex = { @@ -776,6 +780,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true); test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true); + test(ext_conservative_rasterization, VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME, + true); test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 40d00a52f..a9c0a0e4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -264,6 +264,11 @@ public: return ext_shader_stencil_export; } + /// Returns true if the device supports VK_EXT_conservative_rasterization. + bool IsExtConservativeRasterizationSupported() const { + return ext_conservative_rasterization; + } + /// Returns true if the device supports VK_EXT_provoking_vertex. bool IsExtProvokingVertexSupported() const { return ext_provoking_vertex; @@ -374,6 +379,7 @@ private: bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. + bool ext_conservative_rasterization{}; ///< Support for VK_EXT_conservative_rasterization. bool ext_provoking_vertex{}; ///< Support for VK_EXT_provoking_vertex. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool has_renderdoc{}; ///< Has RenderDoc attached -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 15 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glsl/emit_context.cpp | 58 +++--- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 4 +- .../backend/spirv/emit_context.cpp | 97 +++++----- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/shader_recompiler/environment.h | 5 + src/shader_recompiler/frontend/ir/attribute.h | 6 + src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 202 +++++++-------------- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/program_header.h | 62 +++---- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/shader_info.h | 37 +--- src/shader_recompiler/varying_state.h | 69 +++++++ src/video_core/engines/maxwell_3d.h | 7 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +- src/video_core/shader_environment.cpp | 10 +- src/video_core/vulkan_common/vulkan_device.cpp | 6 + src/video_core/vulkan_common/vulkan_device.h | 6 + 29 files changed, 345 insertions(+), 331 deletions(-) create mode 100644 src/shader_recompiler/varying_state.h (limited to 'src/video_core') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3b5708cb9..b5b7e5e83 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -229,6 +229,7 @@ add_library(shader_recompiler STATIC program_header.h runtime_info.h shader_info.h + varying_state.h ) target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 21e14867c..80dad9ff3 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,14 +83,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (generic.used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, attr_stage, index, index); + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads_position) { + if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -102,7 +101,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { @@ -124,8 +123,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 79314f130..2b96977b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -296,8 +296,10 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || - info.stores_viewport_mask) { + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index bc195d248..02c9dc6d7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -261,7 +261,7 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, fmt::format("{}.z", value), fmt::format("{}.w", value)}; read(compare_index, values); }}; - if (ctx.info.loads_position) { + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { const u32 index{static_cast(IR::Attribute::PositionX)}; if (IsInputArray(ctx.stage)) { read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); @@ -269,8 +269,8 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { - if (!ctx.info.input_generics[index].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { continue; } read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 14c009535..0d7f7bc3b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -212,22 +212,22 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { - if (!ctx.info.stores_legacy_varyings) { + if (!ctx.info.stores.Legacy()) { return; } - if (ctx.info.stores_fixed_fnc_textures) { + if (ctx.info.stores.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.stores_color_front_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_FrontColor;"; } - if (ctx.info.stores_color_front_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { header += "vec4 gl_FrontSecondaryColor;"; } - if (ctx.info.stores_color_back_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { header += "vec4 gl_BackColor;"; } - if (ctx.info.stores_color_back_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { header += "vec4 gl_BackSecondaryColor;"; } } @@ -237,32 +237,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { return; } header += "out gl_PerVertex{vec4 gl_Position;"; - if (ctx.info.stores_point_size) { + if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; } - if (ctx.info.stores_clip_distance) { + if (ctx.info.stores.ClipDistances()) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && - ctx.stage != Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } SetupLegacyOutPerVertex(ctx, header); header += "};"; - if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { - if (!ctx.info.loads_legacy_varyings) { + if (!ctx.info.loads.Legacy()) { return; } header += "in gl_PerFragment{"; - if (ctx.info.loads_fixed_fnc_textures) { + if (ctx.info.loads.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.loads_color_front_diffuse) { + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_Color;"; } header += "};"; @@ -325,14 +325,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupOutPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += - fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,11 +348,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (!info.stores_generics[index]) { - continue; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); } - DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -398,14 +396,14 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if ((info.stores_viewport_index || info.stores_layer) && + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } if (info.uses_typeless_image_reads) { @@ -535,20 +533,20 @@ void EmitContext::DefineHelperFunctions() { fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { const auto position_idx{is_array ? "gl_in[vertex]." : ""}; func += fmt::format("case {}:return {}{}[masked_index];", static_cast(IR::Attribute::PositionX) >> 2, position_idx, position_name); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; func += fmt::format("case {}:return in_attr{}{}[masked_index];", - base_attribute_value + i, i, vertex_idx); + base_attribute_value + index, index, vertex_idx); } func += "default: return 0.0;}}"; header += func; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da2..8deaf5760 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -171,7 +171,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3d2ba2eee..16e2a8502 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,7 +179,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { ctx.AddF32("{}=0.f;", inst, attr); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 6420aaa21..298881c7b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -20,8 +20,8 @@ void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { - if (!ctx.info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { continue; } const auto& info_array{ctx.output_generics.at(index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4c6501129..af4fb0c69 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -557,7 +557,7 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); + DefineInputs(program); DefineOutputs(program); } @@ -693,16 +693,16 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + index); labels.push_back(OpLabel()); } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); @@ -710,7 +710,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Const(0.0f)); size_t label_index{0}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{is_array ? OpAccessChain(input_f32, input_position, vertex, masked_index) @@ -719,18 +719,18 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturnValue(result); ++label_index; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - if (!info.input_generics[i].used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast(i))}; + const auto type{AttrTypes(*this, static_cast(index))}; if (!type) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(i)}; + const Id generic_id{input_generics.at(index)}; const Id pointer{is_array ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) : OpAccessChain(type->pointer, generic_id, masked_index)}; @@ -758,19 +758,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - literals.push_back(base_attribute_value + static_cast(i)); + literals.push_back(base_attribute_value + static_cast(index)); labels.push_back(OpLabel()); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { literals.push_back(static_cast(IR::Attribute::ClipDistance0) >> 2); labels.push_back(OpLabel()); literals.push_back(static_cast(IR::Attribute::ClipDistance4) >> 2); @@ -781,28 +781,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturn(); size_t label_index{0}; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - if (output_generics[i][0].num_components != 4) { + if (output_generics[index][0].num_components != 4) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[i][0].id}; + const Id generic_id{output_generics[index][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); @@ -1146,7 +1146,10 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1183,15 +1186,20 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } - if (info.loads_primitive_id) { + if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); } - if (info.loads_position) { + if (loads.AnyComponent(IR::Attribute::PositionX)) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } } - if (info.loads_instance_id) { + if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { @@ -1199,7 +1207,7 @@ void EmitContext::DefineInputs(const Info& info) { base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } - if (info.loads_vertex_id) { + if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { @@ -1207,24 +1215,24 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } - if (info.loads_front_face) { + if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } - if (info.loads_point_coord) { + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); } - if (info.loads_tess_coord) { + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } - for (size_t index = 0; index < info.input_generics.size(); ++index) { - if (!runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { continue; } - const InputVarying generic{info.input_generics[index]}; - if (!generic.used) { + if (!loads.Generic(index)) { continue; } - const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1234,10 +1242,13 @@ void EmitContext::DefineInputs(const Info& info) { Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } if (stage != Stage::Fragment) { continue; } - switch (generic.interpolation) { + switch (info.interpolation[index]) { case Interpolation::Smooth: // Default // Decorate(id, spv::Decoration::Smooth); @@ -1266,42 +1277,42 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const IR::Program& program) { const Info& info{program.info}; const std::optional invocations{program.invocations}; - if (info.stores_position || stage == Stage::VertexB) { + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || runtime_info.fixed_state_point_size) { + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } - if (info.stores_layer && + if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } - if (info.stores_viewport_index && + if (info.stores[IR::Attribute::ViewportIndex] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 527685fb8..e277bc358 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -300,7 +300,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineInputs(const Info& info); + void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 278c262f8..ddb86d070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -281,11 +281,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); break; } - if (program.info.stores_point_size) { + if (program.info.stores[IR::Attribute::PointSize]) { ctx.AddCapability(spv::Capability::GeometryPointSize); } ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; @@ -377,20 +385,21 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_viewport_index) { + if (info.stores[IR::Attribute::ViewportIndex]) { ctx.AddCapability(spv::Capability::MultiViewport); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { ctx.AddExtension("SPV_NV_viewport_array2"); ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); } - if (info.stores_layer || info.stores_viewport_index) { + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 85bd72389..77fbb2b2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,7 +298,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 090bc1c08..8369d0d84 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -31,6 +31,10 @@ public: return sph; } + [[nodiscard]] const std::array& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + [[nodiscard]] Stage ShaderStage() const noexcept { return stage; } @@ -41,6 +45,7 @@ public: protected: ProgramHeader sph{}; + std::array gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; }; diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30..ca1199494 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast(static_cast(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d..ebcaa8bc2 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152..5e32ac784 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); } - info.loads_position |= header.vtg.imap_systemb.position != 0; + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings } if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d46be1638..ee1887b56 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -34,6 +34,7 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 6933750aa..bd6c2bfb5 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -37,7 +37,9 @@ struct ProgramHeader { BitField<15, 1, u32> kills_pixels; BitField<16, 1, u32> does_global_store; BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; @@ -79,24 +81,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } imap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } imap_generic_vector[16]; + std::array imap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { @@ -122,24 +110,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } omap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } omap_generic_vector[16]; + std::array omap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // OmapColor @@ -157,18 +131,24 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return imap_generic_vector[index >> 1].first != 0; - } - return imap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } - [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return omap_generic_vector[index >> 1].first != 0; - } - return omap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } } vtg; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 63fe2afaf..f3f83a258 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "shader_recompiler/varying_state.h" namespace Shader { @@ -60,7 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; - std::bitset<32> previous_stage_stores_generic{}; + VaryingState previous_stage_stores; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a20e15d2e..4ef4dbd40 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" #include #include @@ -44,11 +45,6 @@ enum class Interpolation { NoPerspective, }; -struct InputVarying { - Interpolation interpolation{Interpolation::Smooth}; - bool used{false}; -}; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -121,18 +117,10 @@ struct Info { bool uses_subgroup_shuffles{}; std::array uses_patches{}; - std::array input_generics{}; - bool loads_primitive_id{}; - bool loads_position{}; - bool loads_color_front_diffuse{}; - bool loads_fixed_fnc_textures{}; - bool loads_point_coord{}; - bool loads_instance_id{}; - bool loads_vertex_id{}; - bool loads_front_face{}; - bool loads_legacy_varyings{}; - - bool loads_tess_coord{}; + std::array interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; bool loads_indexed_attributes{}; @@ -140,21 +128,6 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::bitset<32> stores_generics{}; - bool stores_layer{}; - bool stores_viewport_index{}; - bool stores_point_size{}; - bool stores_position{}; - bool stores_color_front_diffuse{}; - bool stores_color_front_specular{}; - bool stores_color_back_diffuse{}; - bool stores_color_back_specular{}; - bool stores_fixed_fnc_textures{}; - bool stores_clip_distance{}; - bool stores_fog_coordinate{}; - bool stores_viewport_mask{}; - bool stores_legacy_varyings{}; - bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] || mask[static_cast(base) + 1] || + mask[static_cast(base) + 2] || mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] && mask[static_cast(base) + 1] && + mask[static_cast(base) + 2] && mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded671..471d5686a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b7745..06e39a503 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b56..429cab30d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f8..da4721e6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d..d0adc0127 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 8a3427a4c857aa08e365d1776d1f0d9f32639c9c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:40:24 -0300 Subject: glasm: Add passthrough geometry shader support --- .../backend/glasm/emit_context.cpp | 5 ++-- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 28 ++++++++++++++++++---- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 5 files changed, 33 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 80dad9ff3..069c019ad 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,13 +83,14 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { - if (info.loads.Generic(index)) { + if (loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { + if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2b96977b3..64787b353 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -304,6 +304,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_viewport_array2;"; } } + if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { + header += "OPTION NV_geometry_shader_passthrough;"; + } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { header += "OPTION EXT_shader_image_load_formatted;"; } @@ -410,11 +413,26 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I runtime_info.tess_clockwise ? "CW" : "CCW"); break; case Stage::Geometry: - header += fmt::format("PRIMITIVE_IN {};" - "PRIMITIVE_OUT {};" - "VERTICES_OUT {};", - InputPrimitive(runtime_info.input_topology), - OutputPrimitive(program.output_topology), program.output_vertices); + header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); + if (program.is_geometry_passthrough) { + if (profile.support_geometry_shader_passthrough) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (program.info.passthrough.Generic(index)) { + header += fmt::format("PASSTHROUGH result.attrib[{}];", index); + } + } + if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + header += "PASSTHROUGH result.position;"; + } + } else { + LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); + } + } else { + header += + fmt::format("VERTICES_OUT {};" + "PRIMITIVE_OUT {};", + program.output_vertices, OutputPrimitive(program.output_topology)); + } break; case Stage::Compute: header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 27be347e6..6818951f2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad7b01b06..45ddf5e01 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,10 @@ public: return has_depth_buffer_float; } + bool HasGeometryShaderPassthrough() const { + return has_geometry_shader_passthrough; + } + bool HasNvGpuShader5() const { return has_nv_gpu_shader_5; } @@ -174,6 +178,7 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_geometry_shader_passthrough{}; bool has_nv_gpu_shader_5{}; bool has_shader_int64{}; bool has_amd_shader_half_float{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 06e39a503..af8e9f44d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,7 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), - .support_geometry_shader_passthrough = false, // TODO + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), -- cgit v1.2.3 From 1152d66ddd4e7b29b53e01990fef77e4cff20e24 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:28:48 -0400 Subject: general: Add setting shader_backend GLASM is getting good enough that we can move it out of advanced graphics settings. This removes the setting `use_assembly_shaders`, opting for a enum class `shader_backend`. This comes with the benefits that it is extensible for additional shader backends besides GLSL and GLASM, and this will work better with a QComboBox. Qt removes the related assembly shader setting from the Advanced Graphics section and places it as a new QComboBox in the API Settings group. This will replace the Vulkan device selector when OpenGL is selected. Additionally, mark all of the custom anisotropic filtering settings as "WILL BREAK THINGS", as that is the case with a select few games. --- src/common/settings.cpp | 4 +- src/common/settings.h | 8 +- src/core/telemetry_session.cpp | 4 +- src/video_core/renderer_opengl/gl_device.cpp | 10 +- src/yuzu/configuration/config.cpp | 7 +- src/yuzu/configuration/config.h | 3 +- src/yuzu/configuration/configure_graphics.cpp | 76 ++++++++----- src/yuzu/configuration/configure_graphics.h | 4 +- src/yuzu/configuration/configure_graphics.ui | 118 ++++++++++++++++----- .../configuration/configure_graphics_advanced.cpp | 7 -- .../configuration/configure_graphics_advanced.h | 1 - .../configuration/configure_graphics_advanced.ui | 18 +--- src/yuzu_cmd/config.cpp | 2 +- src/yuzu_cmd/default_ini.h | 7 +- 14 files changed, 182 insertions(+), 87 deletions(-) (limited to 'src/video_core') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf5514386..66268ea0f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -57,7 +57,7 @@ void LogSettings() { log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); - log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); @@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); - values.use_assembly_shaders.SetGlobal(true); + values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_caches_gc.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ac0590690..32dfb1d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -24,6 +24,12 @@ enum class RendererBackend : u32 { Vulkan = 1, }; +enum class ShaderBackend : u32 { + GLSL = 0, + GLASM = 1, + SPIRV = 2, +}; + enum class GPUAccuracy : u32 { Normal = 0, High = 1, @@ -334,7 +340,7 @@ struct Values { Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; BasicSetting disable_fps_limit{false, "disable_fps_limit"}; - Setting use_assembly_shaders{false, "use_assembly_shaders"}; + Setting shader_backend{ShaderBackend::GLASM, "shader_backend"}; Setting use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting use_fast_gpu_time{true, "use_fast_gpu_time"}; Setting use_caches_gc{false, "use_caches_gc"}; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 066cb23e4..422de3a7d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); - AddField(field_type, "Renderer_UseAssemblyShaders", - Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_ShaderBackend", + static_cast(Settings::values.shader_backend.GetValue())); AddField(field_type, "Renderer_UseAsynchronousShaders", Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6818951f2..c4eeed53b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,9 +172,10 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && - GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_assembly_shaders = + Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && + GLAD_GL_NV_transform_feedback2; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && @@ -187,7 +188,8 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { + if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index dc69574a9..52b3ed02e 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -814,7 +814,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); - ReadGlobalSetting(Settings::values.use_assembly_shaders); + ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_caches_gc); @@ -1345,7 +1345,10 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_nvdec_emulation); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); - WriteGlobalSetting(Settings::values.use_assembly_shaders); + WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), + static_cast(Settings::values.shader_backend.GetValue(global)), + static_cast(Settings::values.shader_backend.GetDefault()), + Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_caches_gc); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 96f9b6de1..4bbb9f1cd 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -180,5 +180,6 @@ private: // These metatype declarations cannot be in common/settings.h because core is devoid of QT Q_DECLARE_METATYPE(Settings::CPUAccuracy); -Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::GPUAccuracy); +Q_DECLARE_METATYPE(Settings::RendererBackend); +Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 4d5b4c0e6..463448dbf 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -26,19 +26,25 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->setupUi(this); + ui->backend->addItem(QStringLiteral("GLSL")); + ui->backend->addItem(tr("GLASM (NVIDIA Only)")); + ui->backend->addItem(QStringLiteral("SPIR-V")); + SetupPerGameUI(); SetConfiguration(); connect(ui->api, qOverload(&QComboBox::currentIndexChanged), this, [this] { - UpdateDeviceComboBox(); + UpdateAPILayout(); if (!Settings::IsConfiguringGlobal()) { ConfigurationShared::SetHighlight( - ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); + ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); } }); connect(ui->device, qOverload(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->backend, qOverload(&QComboBox::activated), this, + [this](int backend) { UpdateShaderBackendSelection(backend); }); connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); @@ -48,6 +54,10 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) UpdateBackgroundColorButton(new_bg_color); }); + for (const auto& device : vulkan_devices) { + ui->device->addItem(device); + } + ui->bg_label->setVisible(Settings::IsConfiguringGlobal()); ui->bg_combobox->setVisible(!Settings::IsConfiguringGlobal()); } @@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) { } } +void ConfigureGraphics::UpdateShaderBackendSelection(int backend) { + if (backend == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) { + shader_backend = static_cast(backend); + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); - ui->api->setEnabled(runtime_lock); + ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); @@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() { ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); - ConfigurationShared::SetHighlight(ui->api_layout, + ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, @@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() { ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); } - UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue())); - UpdateDeviceComboBox(); + UpdateAPILayout(); } void ConfigureGraphics::ApplyConfiguration() { @@ -128,6 +146,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.shader_backend.UsingGlobal()) { + Settings::values.shader_backend.SetValue(shader_backend); + } if (Settings::values.vulkan_device.UsingGlobal()) { Settings::values.vulkan_device.SetValue(vulkan_device); } @@ -139,15 +160,22 @@ void ConfigureGraphics::ApplyConfiguration() { } else { if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(true); } else { Settings::values.renderer_backend.SetGlobal(false); Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); - if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + Settings::values.shader_backend.SetGlobal(false); + Settings::values.vulkan_device.SetGlobal(true); + Settings::values.shader_backend.SetValue(shader_backend); + break; + case Settings::RendererBackend::Vulkan: + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(false); Settings::values.vulkan_device.SetValue(vulkan_device); - } else { - Settings::values.vulkan_device.SetGlobal(true); + break; } } @@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { ui->bg_button->setIcon(color_icon); } -void ConfigureGraphics::UpdateDeviceComboBox() { - ui->device->clear(); - - bool enabled = false; - +void ConfigureGraphics::UpdateAPILayout() { if (!Settings::IsConfiguringGlobal() && ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + vulkan_device = Settings::values.vulkan_device.GetValue(true); + shader_backend = Settings::values.shader_backend.GetValue(true); + ui->device_widget->setEnabled(false); + ui->backend_widget->setEnabled(false); + } else { vulkan_device = Settings::values.vulkan_device.GetValue(); + shader_backend = Settings::values.shader_backend.GetValue(); + ui->device_widget->setEnabled(true); + ui->backend_widget->setEnabled(true); } + switch (GetCurrentGraphicsBackend()) { case Settings::RendererBackend::OpenGL: - ui->device->addItem(tr("OpenGL Graphics Device")); - enabled = false; + ui->backend->setCurrentIndex(static_cast(shader_backend)); + ui->device_widget->setVisible(false); + ui->backend_widget->setVisible(true); break; case Settings::RendererBackend::Vulkan: - for (const auto& device : vulkan_devices) { - ui->device->addItem(device); - } ui->device->setCurrentIndex(vulkan_device); - enabled = !vulkan_devices.empty(); + ui->device_widget->setVisible(true); + ui->backend_widget->setVisible(false); break; } - // If in per-game config and use global is selected, don't enable. - enabled &= !(!Settings::IsConfiguringGlobal() && - ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); - ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } void ConfigureGraphics::RetrieveVulkanDevices() try { diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 6418115cf..c866b911b 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -34,8 +34,9 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); - void UpdateDeviceComboBox(); + void UpdateAPILayout(); void UpdateDeviceSelection(int device); + void UpdateShaderBackendSelection(int backend); void RetrieveVulkanDevices(); @@ -53,4 +54,5 @@ private: std::vector vulkan_devices; u32 vulkan_device{}; + Settings::ShaderBackend shader_backend{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 5b999d84d..099ddbb7c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -23,7 +23,7 @@ - + 0 @@ -40,37 +40,107 @@ 6 - - - - API: - + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Shader Backend: + + + + + + + - - - - - OpenGL + + + + + 0 - - - - Vulkan + + 0 - + + 0 + + + 0 + + + + + Device: + + + + + + + - - - - Device: - + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + API: + + + + + + + + 0 + 0 + + + + + OpenGL + + + + + Vulkan + + + + + - - - diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a9e611125..38276feb1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); - ui->use_assembly_shaders->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); - ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -58,8 +56,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, - ui->use_assembly_shaders, use_assembly_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); @@ -100,7 +96,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); - ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); @@ -112,8 +107,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { } ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); - ConfigurationShared::SetColoredTristate( - ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, use_asynchronous_shaders); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 9148aacf2..7356e6916 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -35,7 +35,6 @@ private: std::unique_ptr ui; ConfigurationShared::CheckState use_vsync; - ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_caches_gc; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ad0840355..772e5fed3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -76,16 +76,6 @@ - - - - Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental. - - - Use assembly shaders (experimental, Nvidia OpenGL only) - - - @@ -144,22 +134,22 @@ - 2x + 2x (WILL BREAK THINGS) - 4x + 4x (WILL BREAK THINGS) - 8x + 8x (WILL BREAK THINGS) - 16x + 16x (WILL BREAK THINGS) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 763df6dd6..640d7d111 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -458,7 +458,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); ReadSetting("Renderer", Settings::values.use_vsync); ReadSetting("Renderer", Settings::values.disable_fps_limit); - ReadSetting("Renderer", Settings::values.use_assembly_shaders); + ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a6ca7b6cd..b7115b06a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -248,9 +248,10 @@ max_anisotropy = # 0 (default): Off, 1: On use_vsync = -# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. -# 0: Off, 1 (default): On -use_assembly_shaders = +# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is +# not available and GLASM is selected, GLSL will be used. +# 0: GLSL, 1 (default): GLASM, 2: SPIR-V +shader_backend = # Whether to allow asynchronous shader building. # 0 (default): Off, 1: On -- cgit v1.2.3 From fb9b1787f86d069db27fe0af44ded042c6d8de39 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Tue, 22 Jun 2021 01:12:11 -0400 Subject: video_core: Enable GL SPIR-V shaders --- .../renderer_opengl/gl_compute_pipeline.cpp | 17 ++++-- .../renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 8 ++- src/video_core/renderer_opengl/gl_device.h | 11 ++++ .../renderer_opengl/gl_graphics_pipeline.cpp | 64 ++++++++++++++-------- .../renderer_opengl/gl_graphics_pipeline.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 40 +++++++++++--- 7 files changed, 105 insertions(+), 38 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 2d6442d74..c63e87a56 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -5,6 +5,7 @@ #include #include "common/cityhash.h" +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -40,15 +41,23 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code) + std::string code, std::vector code_v) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { - if (device.UseAssemblyShaders()) { - assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); + break; + case Settings::ShaderBackend::GLASM: + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + break; + case Settings::ShaderBackend::SPIRV: + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); + LinkProgram(source_program.handle); + break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5fc45f26..50c676365 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code); + std::string code, std::vector code_v); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c4eeed53b..99f8769fc 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -177,6 +177,11 @@ Device::Device() { GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + shader_backend = (Settings::values.shader_backend.GetValue() == + Settings::ShaderBackend::GLASM) == use_assembly_shaders + ? Settings::values.shader_backend.GetValue() + : Settings::ShaderBackend::GLSL; + // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)); @@ -188,8 +193,7 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - !use_assembly_shaders) { + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 45ddf5e01..ee992aed4 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -8,6 +8,10 @@ #include "common/common_types.h" #include "shader_recompiler/stage.h" +namespace Settings { +enum class ShaderBackend : u32; +}; + namespace OpenGL { class Device { @@ -148,6 +152,10 @@ public: return need_fastmath_off; } + Settings::ShaderBackend GetShaderBackend() const { + return shader_backend; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -159,6 +167,9 @@ private: u32 max_varyings{}; u32 max_compute_shared_memory_size{}; u32 max_glasm_storage_buffer_blocks{}; + + Settings::ShaderBackend shader_backend{}; + bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a93b03cf7..1f19b5825 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,7 +3,11 @@ // Refer to the license.txt file included. #include +#include +#include +#include +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -179,7 +183,8 @@ GraphicsPipeline::GraphicsPipeline( Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, - const std::array& infos, const GraphicsPipelineKey& key_) + std::array, 5> sources_spirv, const std::array& infos, + const GraphicsPipelineKey& key_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, key{key_} { @@ -232,29 +237,44 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; + auto func{ + [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); } - if (device.UseAssemblyShaders()) { - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } else { - AttachShader(Stage(stage), program.handle, code); + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + case Settings::ShaderBackend::GLASM: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + } break; + case Settings::ShaderBackend::SPIRV: { + const auto code{sources_spirv[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + } } - } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built.store(true, std::memory_order_relaxed); + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index f82d712f8..5f5d57385 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -78,6 +78,7 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, + std::array, 5> sources_spirv, const std::array& infos, const GraphicsPipelineKey& key_); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index af8e9f44d..cde0f54c9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -15,6 +15,7 @@ #include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/settings.h" #include "common/thread_worker.h" #include "core/core.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -415,6 +416,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; + std::array, 5> sources_spirv; Shader::Backend::Bindings binding; Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; @@ -431,17 +433,23 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; - if (use_glasm) { - sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::GLASM: + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::SPIRV: + sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); + break; } previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, key); + return std::make_unique( + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, sources_spirv, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -478,10 +486,24 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) - : EmitGLSL(profile, program)}; + + std::string code{}; + std::vector code_spirv; + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + code = EmitGLSL(profile, program); + break; + case Settings::ShaderBackend::GLASM: + code = EmitGLASM(profile, info, program); + break; + case Settings::ShaderBackend::SPIRV: + code_spirv = EmitSPIRV(profile, program); + break; + } + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, code); + kepler_compute, program_manager, program.info, code, + code_spirv); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; -- cgit v1.2.3 From 57a8921e01a90ff5993079dd638a6c48e5781756 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:21:51 -0300 Subject: vk_graphics_pipeline: Implement line width --- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 9 +++++++++ src/video_core/renderer_vulkan/vk_rasterizer.h | 1 + src/video_core/renderer_vulkan/vk_state_tracker.cpp | 14 ++++++++++---- src/video_core/renderer_vulkan/vk_state_tracker.h | 7 ++++++- src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- src/video_core/vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 5 +++++ 8 files changed, 36 insertions(+), 8 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6d664ed6b..3363a6877 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -705,11 +705,12 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .pAttachments = cb_attachments.data(), .blendConstants = {}, }; - static_vector dynamic_states{ + static_vector dynamic_states{ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_LINE_WIDTH, }; if (key.state.extended_dynamic_state) { static constexpr std::array extended{ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f04c3394c..bb7301c53 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -541,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); + UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthBoundsTestEnable(regs); @@ -676,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) } } +void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchLineWidth()) { + return; + } + const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; + scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); +} + void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c954fa7f8..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -125,6 +125,7 @@ private: void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 0ebe0473f..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -29,10 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; Flags MakeInvalidationFlags() { static constexpr int INVALIDATION_FLAGS[]{ - Viewports, Scissors, DepthBias, BlendConstants, - DepthBounds, StencilProperties, CullMode, DepthBoundsEnable, - DepthTestEnable, DepthWriteEnable, DepthCompareOp, FrontFace, - StencilOp, StencilTestEnable, VertexBuffers, VertexInput, + Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, + StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, + DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, + VertexBuffers, VertexInput, }; Flags flags{}; for (const int flag : INVALIDATION_FLAGS) { @@ -86,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { table[OFF(stencil_back_func_mask)] = StencilProperties; } +void SetupDirtyLineWidth(Tables& tables) { + tables[0][OFF(line_width_smooth)] = LineWidth; + tables[0][OFF(line_width_aliased)] = LineWidth; +} + void SetupDirtyCullMode(Tables& tables) { auto& table = tables[0]; table[OFF(cull_face)] = CullMode; @@ -180,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); + SetupDirtyLineWidth(tables); SetupDirtyCullMode(tables); SetupDirtyDepthBoundsEnable(tables); SetupDirtyDepthTestEnable(tables); diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 1976b7e9b..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -31,6 +31,7 @@ enum : u8 { BlendConstants, DepthBounds, StencilProperties, + LineWidth, CullMode, DepthBoundsEnable, @@ -44,7 +45,7 @@ enum : u8 { Blending, ViewportSwizzles, - Last + Last, }; static_assert(Last <= std::numeric_limits::max()); @@ -93,6 +94,10 @@ public: return Exchange(Dirty::StencilProperties, false); } + bool TouchLineWidth() const { + return Exchange(Dirty::LineWidth, false); + } + bool TouchCullMode() { return Exchange(Dirty::CullMode, false); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index da4721e6b..912e03c5c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -227,7 +227,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .depthBiasClamp = true, .fillModeNonSolid = true, .depthBounds = is_depth_bounds_supported, - .wideLines = false, + .wideLines = true, .largePoints = true, .alphaToOne = false, .multiViewport = true, @@ -703,7 +703,6 @@ void Device::CheckSuitability(bool requires_swapchain) const { const std::array feature_report{ std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), - std::make_pair(features.robustBufferAccess, "robustBufferAccess"), std::make_pair(features.imageCubeArray, "imageCubeArray"), std::make_pair(features.independentBlend, "independentBlend"), std::make_pair(features.depthClamp, "depthClamp"), @@ -712,6 +711,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { std::make_pair(features.multiViewport, "multiViewport"), std::make_pair(features.depthBiasClamp, "depthBiasClamp"), std::make_pair(features.fillModeNonSolid, "fillModeNonSolid"), + std::make_pair(features.wideLines, "wideLines"), std::make_pair(features.geometryShader, "geometryShader"), std::make_pair(features.tessellationShader, "tessellationShader"), std::make_pair(features.sampleRateShading, "sampleRateShading"), diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d7e9fac22..bbf0fccae 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -121,6 +121,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdSetDepthTestEnableEXT); X(vkCmdSetDepthWriteEnableEXT); X(vkCmdSetFrontFaceEXT); + X(vkCmdSetLineWidth); X(vkCmdSetPrimitiveTopologyEXT); X(vkCmdSetStencilOpEXT); X(vkCmdSetStencilTestEnableEXT); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index d43b606f1..d76bb4324 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -231,6 +231,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; PFN_vkCmdSetEvent vkCmdSetEvent{}; PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; + PFN_vkCmdSetLineWidth vkCmdSetLineWidth{}; PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; PFN_vkCmdSetScissor vkCmdSetScissor{}; PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; @@ -1198,6 +1199,10 @@ public: dld->vkCmdSetFrontFaceEXT(handle, front_face); } + void SetLineWidth(float line_width) const noexcept { + dld->vkCmdSetLineWidth(handle, line_width); + } + void SetPrimitiveTopologyEXT(VkPrimitiveTopology primitive_topology) const noexcept { dld->vkCmdSetPrimitiveTopologyEXT(handle, primitive_topology); } -- cgit v1.2.3 From f94f0be5215369a6985247ad936d9d9f43c9b140 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 25 Jun 2021 05:25:19 -0300 Subject: vk_graphics_pipeline: Implement smooth lines --- .../renderer_vulkan/fixed_pipeline_state.cpp | 1 + .../renderer_vulkan/fixed_pipeline_state.h | 1 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 21 +++++++++++ src/video_core/vulkan_common/vulkan_device.cpp | 41 +++++++++++++++++++--- src/video_core/vulkan_common/vulkan_device.h | 6 ++++ 5 files changed, 65 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 7563dc462..d089da8a4 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -88,6 +88,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); + smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); for (size_t i = 0; i < regs.rt.size(); ++i) { color_formats[i] = static_cast(regs.rt[i].format); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 66b57b636..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -195,6 +195,7 @@ struct FixedPipelineState { BitField<11, 1, u32> y_negate; BitField<12, 1, u32> provoking_vertex_last; BitField<13, 1, u32> conservative_raster_enable; + BitField<14, 1, u32> smooth_lines; }; std::array color_formats; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3363a6877..f0ae0b0d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -80,6 +80,14 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); } +bool IsLine(VkPrimitiveTopology topology) { + static constexpr std::array line_topologies{ + VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, + // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, + }; + return std::ranges::find(line_topologies, topology) == line_topologies.end(); +} + VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { union Swizzle { u32 raw; @@ -616,6 +624,16 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .depthBiasSlopeFactor = 0.0f, .lineWidth = 1.0f, }; + VkPipelineRasterizationLineStateCreateInfoEXT line_state{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, + .pNext = nullptr, + .lineRasterizationMode = key.state.smooth_lines != 0 + ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT + : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, + .stippledLineEnable = VK_FALSE, // TODO + .lineStippleFactor = 0, + .lineStipplePattern = 0, + }; VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, .pNext = nullptr, @@ -632,6 +650,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, }; + if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { + line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); + } if (device.IsExtConservativeRasterizationSupported()) { conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 912e03c5c..4fa1470e8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -416,6 +416,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); } + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + if (ext_line_rasterization) { + line_raster = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, + .pNext = nullptr, + .rectangularLines = VK_TRUE, + .bresenhamLines = VK_FALSE, + .smoothLines = VK_TRUE, + .stippledRectangularLines = VK_FALSE, + .stippledBresenhamLines = VK_FALSE, + .stippledSmoothLines = VK_FALSE, + }; + SetNext(next, line_raster); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support smooth lines"); + } + if (!ext_conservative_rasterization) { LOG_INFO(Render_Vulkan, "Device doesn't support conservative rasterization"); } @@ -757,6 +774,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_shader_atomic_int64{}; bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; + bool has_ext_line_rasterization{}; for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { const auto test = [&](std::optional> status, const char* name, bool push) { @@ -798,6 +816,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, true); @@ -918,17 +937,29 @@ std::vector Device::LoadExtensions(bool requires_surface) { } } if (has_ext_extended_dynamic_state) { - VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; - dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; - dynamic_state.pNext = nullptr; - features.pNext = &dynamic_state; + VkPhysicalDeviceExtendedDynamicStateFeaturesEXT extended_dynamic_state; + extended_dynamic_state.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; + extended_dynamic_state.pNext = nullptr; + features.pNext = &extended_dynamic_state; physical.GetFeatures2KHR(features); - if (dynamic_state.extendedDynamicState) { + if (extended_dynamic_state.extendedDynamicState) { extensions.push_back(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); ext_extended_dynamic_state = true; } } + if (has_ext_line_rasterization) { + VkPhysicalDeviceLineRasterizationFeaturesEXT line_raster; + line_raster.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT; + line_raster.pNext = nullptr; + features.pNext = &line_raster; + physical.GetFeatures2KHR(features); + if (line_raster.rectangularLines && line_raster.smoothLines) { + extensions.push_back(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME); + ext_line_rasterization = true; + } + } if (has_khr_workgroup_memory_explicit_layout) { VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout; layout.sType = diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index d0adc0127..26100166f 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -259,6 +259,11 @@ public: return ext_extended_dynamic_state; } + /// Returns true if the device supports VK_EXT_line_rasterization. + bool IsExtLineRasterizationSupported() const { + return ext_line_rasterization; + } + /// Returns true if the device supports VK_EXT_vertex_input_dynamic_state. bool IsExtVertexInputDynamicStateSupported() const { return ext_vertex_input_dynamic_state; @@ -382,6 +387,7 @@ private: bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. + bool ext_line_rasterization{}; ///< Support for VK_EXT_line_rasterization. bool ext_vertex_input_dynamic_state{}; ///< Support for VK_EXT_vertex_input_dynamic_state. bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. -- cgit v1.2.3 From 5643a909bc3fa9f497d2f2e68650f823ed2944ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 01:14:06 -0300 Subject: shader: Fix disabled and unwritten attributes and varyings --- .../backend/glsl/emit_glsl_context_get_set.cpp | 8 +++-- .../backend/spirv/emit_spirv_context_get_set.cpp | 6 +++- src/video_core/renderer_opengl/renderer_opengl.cpp | 35 ++++++++++++---------- 3 files changed, 31 insertions(+), 18 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 16e2a8502..d5424301b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,8 +179,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { - ctx.AddF32("{}=0.f;", inst, attr); + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + if (element == 3) { + ctx.AddF32("{}=1.f;", inst, attr); + } else { + ctx.AddF32("{}=0.f;", inst, attr); + } return; } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 77fbb2b2f..756de0a27 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,10 +298,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { + if (!type) { // Attribute is disabled return ctx.Const(0.0f); } + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + // Varying component is not written + return ctx.Const(type && element == 3 ? 1.0f : 0.0f); + } const Id generic_id{ctx.input_generics.at(index)}; const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; const Id value{ctx.OpLoad(type->id, pointer)}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b8777643b..dab0afe6d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -140,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, } AddTelemetryFields(); InitOpenGLObjects(); + + // Initialize default attributes to match hardware's disabled attributes + GLint max_attribs{}; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); + for (GLint attrib = 0; attrib < max_attribs; ++attrib) { + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + } + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -256,21 +276,6 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); - - // Enable seamless cubemaps when per texture parameters are not available - if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { - glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); - } - - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } void RendererOpenGL::AddTelemetryFields() { -- cgit v1.2.3 From fba6bd92d456b4d472ed37e663006fafeef154a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 17:46:01 -0300 Subject: vk_rasterizer: Workaround bug in VK_EXT_vertex_input_dynamic_state Workaround potential bug on Nvidia's driver where only updating high attributes leaves low attributes out dated. --- src/video_core/engines/maxwell_3d.h | 4 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 +++++++++++++--------- 4 files changed, 20 insertions(+), 19 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 471d5686a..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -305,10 +305,6 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } - bool IsConstant() const { - return constant; - } - bool IsValid() const { return size != Size::Invalid; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0f0d780b5..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { const auto gl_index = static_cast(index); // Disable constant attributes. - if (attrib.IsConstant()) { + if (attrib.constant) { glDisableVertexAttribArray(gl_index); continue; } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d089da8a4..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -128,7 +128,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.enabled.Assign(input.constant ? 0 : 1); attribute.buffer.Assign(input.buffer); attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bb7301c53..99576b826 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -801,25 +801,30 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) boost::container::static_vector bindings; boost::container::static_vector attributes; + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexAttribute0 + index]) { - continue; + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = attribute.IsConstant() - ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 - : MaxwellToVK::VertexFormat(attribute.type, attribute.size), - .offset = attribute.offset, - }); + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { -- cgit v1.2.3 From 8722668b3c027f0132d0be07e867247debd08d30 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:42:17 -0300 Subject: emit_spirv: Workaround VK_KHR_shader_float_controls on fp16 Nvidia Fix regression on Fire Emblem: Three Houses when using native fp16. --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 13 ++++++++----- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ddb86d070..d7a86e270 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -319,7 +319,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit Id main_func) { const Info& info{program.info}; if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - LOG_WARNING(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -332,15 +332,15 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); } else { - LOG_WARNING(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } } - if (!profile.support_separate_denorm_behavior) { + if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { // No separate denorm behavior return; } if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - LOG_WARNING(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -353,13 +353,16 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); } else { - LOG_WARNING(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); } } } void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { + if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { + return; + } if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ee1887b56..6ff12387b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -58,6 +58,8 @@ struct Profile { bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; + /// Float controls break when fp16 is enabled + bool has_broken_fp16_float_controls{}; /// Dynamic vec4 indexing is broken on some OpenGL drivers bool has_gl_component_indexing_bug{}; /// The precise type qualifier is broken in the fragment stage of some drivers diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cde0f54c9..2ea9c9f07 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -206,6 +206,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7aaa40ef2..87b843e3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,6 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, .ignore_nan_fp_comparisons = false, }; host_info = Shader::HostTranslateInfo{ -- cgit v1.2.3 From 57171b23f9da0d9fa4b07bb77ba5c8ed0083a792 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:58:32 -0300 Subject: vulkan_device: Enable VK_EXT_extended_dynamic_state on RADV 21.2 onward --- src/video_core/vulkan_common/vulkan_device.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 4fa1470e8..e297a3e92 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -511,10 +511,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectToolingInfo(); if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { - LOG_WARNING( - Render_Vulkan, - "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu"); - ext_extended_dynamic_state = false; + // Mask driver version variant + const u32 version = (properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { + LOG_WARNING(Render_Vulkan, + "RADV versions older than 21.2 have broken VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } } if (ext_vertex_input_dynamic_state && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); -- cgit v1.2.3 From dbee32d302a5944bc8e99b55d956013503b66c6c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 1 Jul 2021 20:32:30 -0400 Subject: gl_shader_cache: Fixes for async shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 25 ++++++++++++++++++++-- src/video_core/renderer_opengl/gl_shader_cache.h | 2 ++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2ea9c9f07..2d7eb3e33 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -328,11 +328,32 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } current_pipeline = pipeline.get(); - if (!pipeline || !pipeline->IsBuilt()) { + return BuiltPipeline(current_pipeline); +} + +GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { return nullptr; } - return pipeline.get(); + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; } ComputePipeline* ShaderCache::CurrentComputePipeline() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 9d5306293..a34110b37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -54,6 +54,8 @@ public: private: GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( -- cgit v1.2.3 From 7277d7fe96d53ae2b73491d91e0a54caf0206fe7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 3 Jul 2021 01:49:59 -0400 Subject: vulkan_device: Blacklist ampere devices from float16 math --- src/video_core/vulkan_common/vulkan_device.cpp | 29 ++++++++++++++++++-------- src/video_core/vulkan_common/vulkan_device.h | 6 +++--- 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e297a3e92..7d66a43e7 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -194,12 +194,22 @@ std::unordered_map GetFormatProperties(vk::Physica return format_properties; } +std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { + const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); + std::vector supported_extensions(std::size(extensions)); + for (const auto& extension : extensions) { + supported_extensions.emplace_back(extension.extensionName); + } + return supported_extensions; +} + } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, - format_properties{GetFormatProperties(physical)} { + supported_extensions{GetSupportedExtensions(physical)}, + format_properties(GetFormatProperties(physical)) { CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); @@ -510,6 +520,13 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { + if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != + supported_extensions.end()) { + LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); + is_float16_supported = false; + } + } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { // Mask driver version variant const u32 version = (properties.driverVersion << 3) >> 3; @@ -778,10 +795,10 @@ std::vector Device::LoadExtensions(bool requires_surface) { bool has_ext_provoking_vertex{}; bool has_ext_vertex_input_dynamic_state{}; bool has_ext_line_rasterization{}; - for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { + for (const std::string& extension : supported_extensions) { const auto test = [&](std::optional> status, const char* name, bool push) { - if (extension.extensionName != std::string_view(name)) { + if (extension != name) { return; } if (push) { @@ -1064,12 +1081,6 @@ void Device::CollectTelemetryParameters() { driver_id = driver.driverID; vendor_name = driver.driverName; - - const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - reported_extensions.reserve(std::size(extensions)); - for (const auto& extension : extensions) { - reported_extensions.emplace_back(extension.extensionName); - } } void Device::CollectPhysicalMemoryInfo() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 26100166f..df394e384 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -301,7 +301,7 @@ public: /// Returns the list of available extensions. const std::vector& GetAvailableExtensions() const { - return reported_extensions; + return supported_extensions; } u64 GetDeviceLocalMemory() const { @@ -398,8 +398,8 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached // Telemetry parameters - std::string vendor_name; ///< Device's driver name. - std::vector reported_extensions; ///< Reported Vulkan extensions. + std::string vendor_name; ///< Device's driver name. + std::vector supported_extensions; ///< Reported Vulkan extensions. /// Format properties dictionary. std::unordered_map format_properties; -- cgit v1.2.3 From 55233c2861a72bd777b75bce20c8d4e46c17a72f Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sat, 3 Jul 2021 03:07:50 -0400 Subject: vulkan_device: Add missing include algorithm --- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7d66a43e7..ceaee8a7e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include -- cgit v1.2.3 From 11f04f1022d0820a1fdba38221ecd38f19d86d9e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 00:34:53 -0400 Subject: shader: Ignore global memory ops on devices lacking int64 support --- .../backend/glsl/emit_context.cpp | 6 ++-- .../backend/glsl/emit_glsl_memory.cpp | 34 ++++++++++++++++---- .../backend/spirv/emit_context.cpp | 2 +- .../backend/spirv/emit_spirv_memory.cpp | 36 ++++++++++++++++++---- src/shader_recompiler/frontend/ir/opcodes.inc | 28 ++++++++--------- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 8 files changed, 79 insertions(+), 30 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0dcdff152..e08d2d2eb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -378,7 +378,7 @@ void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } - if (info.uses_int64) { + if (info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } if (info.uses_int64_bit_atomics) { @@ -402,7 +402,7 @@ void EmitContext::SetupExtensions() { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n" "#extension GL_ARB_shader_group_vote : enable\n"; - if (!info.uses_int64) { + if (!info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } if (profile.support_gl_warp_intrinsics) { @@ -539,7 +539,7 @@ void EmitContext::DefineHelperFunctions() { if (info.uses_atomic_s32_max) { header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } - if (info.uses_global_memory) { + if (info.uses_global_memory && profile.support_int64) { header += DefineGlobalMemoryFunctions(); } if (info.loads_indexed_attributes) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index daef5fb84..e3957491f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -38,15 +39,27 @@ void EmitLoadGlobalS16(EmitContext&) { } void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32("{}=LoadGlobal32({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32("{}=LoadGlobal32({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32("{}=0u;", inst); } void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x2("{}=uvec2(0);", inst); } void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x4("{}=uvec4(0);", inst); } void EmitWriteGlobalU8(EmitContext&) { @@ -66,15 +79,24 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal32({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal32({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal64({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal64({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal128({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal128({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 865f34291..2d29d8c14 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -830,7 +830,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { } void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { - if (!info.uses_global_memory) { + if (!info.uses_global_memory || !profile.support_int64) { return; } using DefPtr = Id StorageDefinitions::*; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index ccebf170d..679ee2684 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -84,15 +84,27 @@ void EmitLoadGlobalS16(EmitContext&) { } Id EmitLoadGlobal32(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u); } Id EmitLoadGlobal64(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u); } Id EmitLoadGlobal128(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u, 0u, 0u); } void EmitWriteGlobalU8(EmitContext&) { @@ -112,15 +124,27 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9af750283..d91098c80 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -71,20 +71,20 @@ OPCODE(UndefU32, U32, OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, Opaque, ) +OPCODE(LoadGlobalS8, U32, Opaque, ) +OPCODE(LoadGlobalU16, U32, Opaque, ) +OPCODE(LoadGlobalS16, U32, Opaque, ) +OPCODE(LoadGlobal32, U32, Opaque, ) +OPCODE(LoadGlobal64, U32x2, Opaque, ) +OPCODE(LoadGlobal128, U32x4, Opaque, ) +OPCODE(WriteGlobalU8, Void, Opaque, U32, ) +OPCODE(WriteGlobalS8, Void, Opaque, U32, ) +OPCODE(WriteGlobalU16, Void, Opaque, U32, ) +OPCODE(WriteGlobalS16, Void, Opaque, U32, ) +OPCODE(WriteGlobal32, Void, Opaque, U32, ) +OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) +OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) // Storage buffer operations OPCODE(LoadStorageU8, U32, U32, U32, ) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6ff12387b..501dcaf71 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,7 @@ struct Profile { bool support_descriptor_aliasing{}; bool support_int8{}; bool support_int16{}; + bool support_int64{}; bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2d7eb3e33..58a4f0fb4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -168,6 +168,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_descriptor_aliasing = false, .support_int8 = false, .support_int16 = false, + .support_int64 = device.HasShaderInt64(), .support_vertex_instance_id = true, .support_float_controls = false, .support_separate_denorm_behavior = false, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87b843e3d..a2646fc6d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -280,6 +280,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_descriptor_aliasing = true, .support_int8 = true, .support_int16 = device.IsShaderInt16Supported(), + .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From be54aad1c40bb50c71e7bcd6465c2fd372c11cb7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 7 Jul 2021 00:18:30 -0300 Subject: maxwell_to_vk: Add R16_SNORM --- src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f9b9a11a..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -157,7 +157,7 @@ struct FormatTuple { {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM - {VK_FORMAT_UNDEFINED}, // R16_SNORM + {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT {VK_FORMAT_UNDEFINED}, // R16_SINT {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index ceaee8a7e..13d938434 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -116,6 +116,7 @@ std::unordered_map GetFormatProperties(vk::Physica VK_FORMAT_R16G16_SFLOAT, VK_FORMAT_R16G16_SINT, VK_FORMAT_R16_UNORM, + VK_FORMAT_R16_SNORM, VK_FORMAT_R16_UINT, VK_FORMAT_R8G8B8A8_SRGB, VK_FORMAT_R8G8_UNORM, -- cgit v1.2.3 From 8390286a89dd259f0ff44cc95fc20d017b58046f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 9 Jul 2021 19:00:11 -0400 Subject: renderers: Disable async shader compilation The current implementation is prone to causing graphical issues. Disable until a better solution is implemented. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 99f8769fc..563b291cd 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,9 +182,11 @@ Device::Device() { ? Settings::values.shader_backend.GetValue() : Settings::ShaderBackend::GLSL; + // Completely disable async shaders for now, as it causes graphical glitches + use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - !(is_amd || (is_intel && !is_linux)); + // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + // !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2646fc6d..39db35175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -269,7 +269,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_asynchronous_shaders{false}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From 41493fbe89200a4a8321dec7b313872435c57df7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Jul 2021 01:04:52 -0400 Subject: renderers: Fix clang formatting --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index dab0afe6d..c9cfa6366 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,8 +24,8 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6fda06a7e..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f0ae0b0d6..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -503,9 +503,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { vertex_attributes.push_back({ .location = static_cast(index), .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, .offset = 0, }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 99576b826..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,7 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); -- cgit v1.2.3 From 49946cf780c317b4c5ccabb52ec433eba01c1970 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Sun, 11 Jul 2021 22:10:38 -0400 Subject: shader_recompiler, video_core: Resolve clang errors Silences the following warnings-turned-errors: -Wsign-conversion -Wunused-private-field -Wbraced-scalar-init -Wunused-variable And some other errors --- src/shader_recompiler/backend/glasm/emit_context.h | 2 +- src/shader_recompiler/backend/glasm/reg_alloc.h | 3 +-- .../backend/glsl/emit_glsl_floating_point.cpp | 2 +- src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 15 ++++++++++----- src/shader_recompiler/frontend/ir/opcodes.h | 3 ++- src/shader_recompiler/frontend/maxwell/control_flow.h | 1 - .../frontend/maxwell/structured_control_flow.cpp | 9 ++------- .../translate/impl/atomic_operations_global_memory.cpp | 12 ++++++------ .../translate/impl/integer_floating_point_conversion.cpp | 4 +++- .../maxwell/translate/impl/load_store_attribute.cpp | 12 ++++++------ .../maxwell/translate/impl/surface_atomic_operations.cpp | 3 --- .../maxwell/translate/impl/surface_load_store.cpp | 8 ++++---- .../ir_opt/global_memory_to_storage_buffer_pass.cpp | 6 +++--- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 4 +--- 14 files changed, 40 insertions(+), 44 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 1da51a996..8433e5c00 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -59,7 +59,7 @@ public: } std::string code; - RegAlloc reg_alloc{*this}; + RegAlloc reg_alloc{}; const Info& info; const Profile& profile; const RuntimeInfo& runtime_info; diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 5a703daf2..82aec66c6 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -86,7 +86,7 @@ struct ScalarF64 : Value {}; class RegAlloc { public: - RegAlloc(EmitContext& ctx_) : ctx{ctx_} {} + RegAlloc() = default; Register Define(IR::Inst& inst); @@ -142,7 +142,6 @@ private: void Free(Id id); - EmitContext& ctx; size_t num_used_registers{}; size_t num_used_long_registers{}; std::bitset register_use{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index b11be5bd7..2edcf592e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -22,7 +22,7 @@ void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string } bool IsPrecise(const IR::Inst& inst) { - return {inst.Flags().no_contraction}; + return inst.Flags().no_contraction; } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 647804814..3588f052b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -109,7 +109,7 @@ private: return; } if (offset.IsImmediate()) { - Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(offset.U32())); + Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast(offset.U32()))); return; } IR::Inst* const inst{offset.InstRecursive()}; @@ -117,16 +117,21 @@ private: switch (inst->GetOpcode()) { case IR::Opcode::CompositeConstructU32x2: Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32())); + ctx.SConst(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()))); return; case IR::Opcode::CompositeConstructU32x3: Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32())); + ctx.SConst(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()))); return; case IR::Opcode::CompositeConstructU32x4: Add(spv::ImageOperandsMask::ConstOffset, - ctx.SConst(inst->Arg(0).U32(), inst->Arg(1).U32(), inst->Arg(2).U32(), - inst->Arg(3).U32())); + ctx.SConst(static_cast(inst->Arg(0).U32()), + static_cast(inst->Arg(1).U32()), + static_cast(inst->Arg(2).U32()), + static_cast(inst->Arg(3).U32()))); return; default: break; diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h index 56b001902..9ab108292 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.h +++ b/src/shader_recompiler/frontend/ir/opcodes.h @@ -67,7 +67,8 @@ constexpr OpcodeMeta META_TABLE[]{ }; constexpr size_t CalculateNumArgsOf(Opcode op) { const auto& arg_types{META_TABLE[static_cast(op)].arg_types}; - return std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)); + return static_cast( + std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); } constexpr u8 NUM_ARGS[]{ diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h index 0e515c3b6..a6bd3e196 100644 --- a/src/shader_recompiler/frontend/maxwell/control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h @@ -161,7 +161,6 @@ private: Environment& env; ObjectPool& block_pool; boost::container::small_vector functions; - FunctionId current_function_id{0}; Location program_start; bool exits_to_dispatcher{}; Block* dispatch_block{}; diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 06fde0017..221454b99 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp @@ -313,9 +313,7 @@ bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { class GotoPass { public: - explicit GotoPass(Flow::CFG& cfg, ObjectPool& inst_pool_, - ObjectPool& block_pool_, ObjectPool& stmt_pool) - : inst_pool{inst_pool_}, block_pool{block_pool_}, pool{stmt_pool} { + explicit GotoPass(Flow::CFG& cfg, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildTree(cfg)}; for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); @@ -616,8 +614,6 @@ private: return parent_tree.insert(std::next(loop), *new_goto); } - ObjectPool& inst_pool; - ObjectPool& block_pool; ObjectPool& pool; Statement root_stmt{FunctionTag{}}; }; @@ -864,7 +860,6 @@ private: ObjectPool& block_pool; Environment& env; IR::AbstractSyntaxList& syntax_list; - u32 loop_id{}; // TODO: C++20 Remove this when all compilers support constexpr std::vector #if __cpp_lib_constexpr_vector >= 201907 @@ -878,7 +873,7 @@ private: IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, Flow::CFG& cfg) { ObjectPool stmt_pool{64}; - GotoPass goto_pass{cfg, inst_pool, block_pool, stmt_pool}; + GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp index 66f39e44e..d9f999e05 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -59,14 +59,14 @@ IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, AtomSize size) { static constexpr IR::FpControl f16_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::DontCare}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::DontCare, }; static constexpr IR::FpControl f32_control{ - .no_contraction{false}, - .rounding{IR::FpRounding::RN}, - .fmz_mode{IR::FmzMode::FTZ}, + .no_contraction = false, + .rounding = IR::FpRounding::RN, + .fmz_mode = IR::FmzMode::FTZ, }; switch (op) { case AtomOp::ADD: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp index e0e157275..0b8119ddd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp @@ -104,7 +104,9 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { .rounding = CastFpRounding(i2f.fp_rounding), .fmz_mode = IR::FmzMode::DontCare, }; - auto value{v.ir.ConvertIToF(dst_bitsize, conversion_src_bitsize, is_signed, src, fp_control)}; + auto value{v.ir.ConvertIToF(static_cast(dst_bitsize), + static_cast(conversion_src_bitsize), is_signed, src, + fp_control)}; if (i2f.neg != 0) { if (i2f.abs != 0 || !is_signed) { // We know the value is positive diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp index 7d7dcc3cb..924fb7a40 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp @@ -80,10 +80,10 @@ void TranslatorVisitor::ALD(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ald.patch != 0) { const IR::Patch patch{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetPatch(patch)); + F(ald.dest_reg + static_cast(element), ir.GetPatch(patch)); } else { const IR::Attribute attr{offset / 4 + element}; - F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttribute(attr, vertex)); } } return; @@ -92,7 +92,7 @@ void TranslatorVisitor::ALD(u64 insn) { throw NotImplementedException("Indirect patch read"); } HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); + F(ald.dest_reg + static_cast(element), ir.GetAttributeIndexed(final_offset, vertex)); }); } @@ -121,10 +121,10 @@ void TranslatorVisitor::AST(u64 insn) { for (u32 element = 0; element < num_elements; ++element) { if (ast.patch != 0) { const IR::Patch patch{offset / 4 + element}; - ir.SetPatch(patch, F(ast.src_reg + element)); + ir.SetPatch(patch, F(ast.src_reg + static_cast(element))); } else { const IR::Attribute attr{offset / 4 + element}; - ir.SetAttribute(attr, F(ast.src_reg + element), vertex); + ir.SetAttribute(attr, F(ast.src_reg + static_cast(element)), vertex); } } return; @@ -133,7 +133,7 @@ void TranslatorVisitor::AST(u64 insn) { throw NotImplementedException("Indexed tessellation patch store"); } HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { - ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); + ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast(element)), vertex); }); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp index 44144f154..63b588ad4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp @@ -69,9 +69,6 @@ TextureType GetType(Type type) { } IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { - const auto array{[&](int index) { - return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); - }}; switch (type) { case Type::_1D: case Type::BUFFER_1D: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp index 7dc793ad7..681220a8d 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp @@ -160,10 +160,10 @@ unsigned SwizzleMask(u64 swizzle) { IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { std::array colors; for (int i = 0; i < num_regs; ++i) { - colors[i] = ir.GetReg(reg + i); + colors[static_cast(i)] = ir.GetReg(reg + i); } for (int i = num_regs; i < 4; ++i) { - colors[i] = ir.Imm32(0); + colors[static_cast(i)] = ir.Imm32(0); } return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); } @@ -211,12 +211,12 @@ void TranslatorVisitor::SULD(u64 insn) { if (is_typed) { const int num_regs{SizeInRegs(suld.size)}; for (int i = 0; i < num_regs; ++i) { - X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast(i))}); } } else { const unsigned mask{SwizzleMask(suld.swizzle)}; const int bits{std::popcount(mask)}; - if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : bits)) { + if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast(bits))) { throw NotImplementedException("Unaligned destination register"); } for (unsigned component = 0; component < 4; ++component) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 70449eeca..f9de17b25 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -314,8 +314,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) return std::nullopt; } const StorageBufferAddr storage_buffer{ - .index{index.U32()}, - .offset{offset.U32()}, + .index = index.U32(), + .offset = offset.U32(), }; if (!Common::IsAligned(storage_buffer.offset, 16)) { // The SSBO pointer has to be aligned @@ -484,7 +484,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_index = storage_buffer.index, .cbuf_offset = storage_buffer.offset, .count = 1, - .is_written{info.writes.contains(storage_buffer)}, + .is_written = info.writes.contains(storage_buffer), }); } for (const StorageInst& storage_inst : info.to_replace) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 622267147..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -104,9 +104,7 @@ public: template static auto MakeConfigureSpecFunc() { - return [](GraphicsPipeline* pipeline, bool is_indexed) { - pipeline->ConfigureImpl(is_indexed); - }; + return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl(is_indexed); }; } private: -- cgit v1.2.3 From 8c166c68d46d160162caa9b588f1e762c57e52f4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 23:26:13 -0300 Subject: gl_shader_cache: Properly implement asynchronous shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 58a4f0fb4..24f035c37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -318,7 +318,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { SetXfbState(graphics_key.xfb_state, regs); } if (current_pipeline && graphics_key == current_pipeline->Key()) { - return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + return BuiltPipeline(current_pipeline); } return CurrentGraphicsPipelineSlowPath(); } -- cgit v1.2.3 From 94af0a00f67c9f28fcaf170458e55b7a95de76bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 02:03:25 -0400 Subject: glsl: Clamp shared mem size to GL_MAX_COMPUTE_SHARED_MEMORY_SIZE --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 11 +++++++++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index ffdc6dbba..c5e819a0a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -218,8 +218,15 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.shared_memory_size > 0) { - ctx.header += - fmt::format("shared uint smem[{}];", Common::DivCeil(program.shared_memory_size, 4U)); + const auto requested_size{program.shared_memory_size}; + const auto max_size{profile.gl_max_compute_smem_size}; + const bool needs_clamp{requested_size > max_size}; + if (needs_clamp) { + LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", + requested_size, max_size); + } + const auto smem_size{needs_clamp ? max_size : requested_size}; + ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); } ctx.header += "void main(){\n"; if (program.local_memory_size > 0) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 501dcaf71..f0c3b3b17 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -67,6 +67,8 @@ struct Profile { bool has_gl_precise_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; + + u32 gl_max_compute_smem_size{}; }; } // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 24f035c37..7ecafc862 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,6 +211,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ .support_float16 = false, -- cgit v1.2.3 From e1ed218b418cd1ed94f6f25ccd0db86b63bd6bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Jul 2021 03:48:30 -0300 Subject: renderer_opengl: Use ARB_separate_shader_objects Ensures that states set for a particular stage are not attached to other stages which may not need them. --- .../renderer_opengl/gl_compute_pipeline.cpp | 10 +-- .../renderer_opengl/gl_graphics_pipeline.cpp | 62 ++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_shader_manager.h | 100 ++++++++++++++++----- src/video_core/renderer_opengl/gl_shader_util.cpp | 57 ++++++------ src/video_core/renderer_opengl/gl_shader_util.h | 6 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 11 ++- src/video_core/renderer_opengl/renderer_opengl.h | 3 +- src/video_core/renderer_opengl/util_shaders.cpp | 19 ++-- 9 files changed, 154 insertions(+), 116 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index c63e87a56..aa1cc592f 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -46,17 +46,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { case Settings::ShaderBackend::GLSL: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); + source_program = CreateProgram(code, GL_COMPUTE_SHADER); break; case Settings::ShaderBackend::GLASM: assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); break; case Settings::ShaderBackend::SPIRV: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); - LinkProgram(source_program.handle); + source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), @@ -154,7 +150,7 @@ void ComputePipeline::Configure() { if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { - program_manager.BindProgram(source_program.handle); + program_manager.BindComputeProgram(source_program.handle); } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 1f19b5825..c8b2d833d 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,44 +237,32 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{ - [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; - case Settings::ShaderBackend::GLASM: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } break; - case Settings::ShaderBackend::SPIRV: { - const auto code{sources_spirv[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; + auto func{[this, device, sources, sources_spirv, + shader_notify](ShaderContext::Context*) mutable { + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + if (!sources[stage].empty()) { + source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); } + break; + case Settings::ShaderBackend::GLASM: + if (!sources[stage].empty()) { + assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + } + break; + case Settings::ShaderBackend::SPIRV: + if (!sources_spirv[stage].empty()) { + source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); + } + break; } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built = true; + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { @@ -449,7 +437,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { - program_manager.BindProgram(program.handle); + program_manager.BindSourcePrograms(source_programs); } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5f5d57385..5e34b9537 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -129,7 +129,7 @@ private: void (*configure_func)(GraphicsPipeline*, bool){}; - OGLProgram program; + std::array source_programs; std::array assembly_programs; u32 enabled_stages_mask{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 88b734bcb..d7ef0775d 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -24,34 +24,68 @@ class ProgramManager { public: explicit ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); if (device.UseAssemblyShaders()) { glEnable(GL_COMPUTE_PROGRAM_NV); } } - void BindProgram(GLuint program) { - if (current_source_program == program) { - return; - } - current_source_program = program; + void BindComputeProgram(GLuint program) { glUseProgram(program); + is_compute_bound = true; } void BindComputeAssemblyProgram(GLuint program) { - if (current_compute_assembly_program != program) { - current_compute_assembly_program = program; + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); + UnbindPipeline(); + } + + void BindSourcePrograms(std::span programs) { + static constexpr std::array stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); + } + + void BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } } + BindPipeline(); } void BindAssemblyPrograms(std::span programs, u32 stage_mask) { - const u32 changed_mask = current_assembly_mask ^ stage_mask; - current_assembly_mask = stage_mask; + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; if (changed_mask != 0) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { @@ -65,25 +99,47 @@ public: } } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_assembly_programs[stage] != programs[stage].handle) { - current_assembly_programs[stage] = programs[stage].handle; + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); } } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); - } + UnbindPipeline(); } void RestoreGuestCompute() {} private: - GLuint current_source_program = 0; + void BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); + } + + void UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); + } + + void UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } + } + + OGLPipeline pipeline; + bool is_pipeline_bound{}; + bool is_compute_bound{}; - u32 current_assembly_mask = 0; - std::array current_assembly_programs{}; - GLuint current_compute_assembly_program = 0; + u32 current_stage_mask = 0; + std::array current_programs{}; + GLuint current_assembly_compute_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5109985f1..d432072ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,6 +13,33 @@ namespace OpenGL { +static OGLProgram LinkSeparableProgram(GLuint shader) { + OGLProgram program; + program.handle = glCreateProgram(); + glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glAttachShader(program.handle, shader); + glLinkProgram(program.handle); + if (!Settings::values.renderer_debug) { + return program; + } + GLint link_status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return program; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program.handle, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } + return program; +} + static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); @@ -36,7 +63,7 @@ static void LogShader(GLuint shader, std::string_view code = {}) { } } -void AttachShader(GLenum stage, GLuint program, std::string_view code) { +OGLProgram CreateProgram(std::string_view code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); @@ -44,45 +71,23 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { const GLchar* const code_ptr = code.data(); glShaderSource(shader.handle, 1, &code_ptr, &length); glCompileShader(shader.handle); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle, code); } + return LinkSeparableProgram(shader.handle); } -void AttachShader(GLenum stage, GLuint program, std::span code) { +OGLProgram CreateProgram(std::span code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), static_cast(code.size_bytes())); glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle); } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } + return LinkSeparableProgram(shader.handle); } OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index ff5aa024f..4e1a2a8e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -17,11 +17,9 @@ namespace OpenGL { -void AttachShader(GLenum stage, GLuint program, std::string_view code); +OGLProgram CreateProgram(std::string_view code, GLenum stage); -void AttachShader(GLenum stage, GLuint program, std::span code); - -void LinkProgram(GLuint program); +OGLProgram CreateProgram(std::span code, GLenum stage); OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c9cfa6366..d15167e19 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -251,10 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - present_program.handle = glCreateProgram(); - AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); - AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); - LinkProgram(present_program.handle); + present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); @@ -340,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - program_manager.BindProgram(present_program.handle); - glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); + program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b3ee55665..d455f572f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -110,7 +110,8 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram present_program; + OGLProgram present_vertex; + OGLProgram present_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8aa0683c8..37a4d1d9d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -42,12 +42,7 @@ using VideoCore::Surface::BytesPerBlock; namespace { OGLProgram MakeProgram(std::string_view source) { - OGLProgram program; - OGLShader shader; - program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, program.handle, source); - LinkProgram(program.handle); - return program; + return CreateProgram(source, GL_COMPUTE_SHADER); } size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { @@ -84,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindProgram(astc_decoder_program.handle); + program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -132,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -171,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -220,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindProgram(pitch_unswizzle_program.handle); + program_manager.BindComputeProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -248,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span Date: Sat, 17 Jul 2021 00:59:57 -0400 Subject: gl_device: Simplify GLASM setting logic --- src/video_core/renderer_opengl/gl_device.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 563b291cd..6afe6c1e1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,16 +172,14 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = - Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && - GLAD_GL_NV_transform_feedback2; - - shader_backend = (Settings::values.shader_backend.GetValue() == - Settings::ShaderBackend::GLASM) == use_assembly_shaders - ? Settings::values.shader_backend.GetValue() - : Settings::ShaderBackend::GLSL; - + shader_backend = Settings::values.shader_backend.GetValue(); + use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && + GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { + LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); + shader_backend = Settings::ShaderBackend::GLSL; + } // Completely disable async shaders for now, as it causes graphical glitches use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. @@ -194,11 +192,6 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - - if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { - LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); - } - if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); } -- cgit v1.2.3 From 56478bc9ac5a01ca5c73ba72faae1a5eaae0f8cb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Jul 2021 16:16:23 -0400 Subject: shader: Fix disabled attribute default values --- src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 756de0a27..fb8c02a77 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -300,7 +300,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const std::optional type{AttrTypes(ctx, index)}; if (!type) { // Attribute is disabled - return ctx.Const(0.0f); + return ctx.Const(element == 3 ? 1.0f : 0.0f); } if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { // Varying component is not written diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d15167e19..285e78384 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -145,7 +145,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, GLint max_attribs{}; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); for (GLint attrib = 0; attrib < max_attribs; ++attrib) { - glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f); } // Enable seamless cubemaps when per texture parameters are not available if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { -- cgit v1.2.3 From 258f35515d61d01049d2e433146cab808837bb7d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 21:07:12 -0300 Subject: shader_environment: Receive cache version from outside This allows us invalidating OpenGL and Vulkan separately in the future. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 10 +++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 9 ++++++--- src/video_core/shader_environment.cpp | 11 +++++------ src/video_core/shader_environment.h | 9 +++++---- 4 files changed, 23 insertions(+), 16 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ecafc862..8d6cc074c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -48,9 +48,12 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -287,7 +290,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -394,7 +397,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION); return pipeline; } @@ -492,7 +495,8 @@ std::unique_ptr ShaderCache::CreateComputePipeline( if (!pipeline || shader_cache_filename.empty()) { return pipeline; } - SerializePipeline(key, std::array{&env}, shader_cache_filename); + SerializePipeline(key, std::array{&env}, shader_cache_filename, + CACHE_VERSION); return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 39db35175..2ce8b4156 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,6 +54,8 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -434,7 +436,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -562,7 +565,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } @@ -581,7 +584,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } serialization_thread.QueueWork([this, key, env = std::move(env)] { SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 429cab30d..8a4581c19 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,6 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -370,7 +369,7 @@ std::array FileEnvironment::WorkgroupSize() const { } void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename) try { + const std::filesystem::path& filename, u32 cache_version) try { std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); file.exceptions(std::ifstream::failbit); if (!file.is_open()) { @@ -381,7 +380,7 @@ void SerializePipeline(std::span key, std::span(&CACHE_VERSION), sizeof(CACHE_VERSION)); + .write(reinterpret_cast(&cache_version), sizeof(cache_version)); } if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { return; @@ -402,7 +401,7 @@ void SerializePipeline(std::span key, std::span load_compute, Common::UniqueFunction> load_graphics) try { std::ifstream file(filename, std::ios::binary | std::ios::ate); @@ -417,13 +416,13 @@ void LoadPipelines( u32 cache_version; file.read(magic_number.data(), magic_number.size()) .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { file.close(); if (Common::FS::RemoveFile(filename)) { if (magic_number != MAGIC_NUMBER) { LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); } - if (cache_version != CACHE_VERSION) { + if (cache_version != expected_cache_version) { LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); } } else { diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index d26dbfaab..2079979db 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -164,18 +164,19 @@ private: }; void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename); + const std::filesystem::path& filename, u32 cache_version); template -void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename, + u32 cache_version) { static_assert(std::is_trivially_copyable_v); static_assert(std::has_unique_object_representations_v); SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), - std::span(envs.data(), envs.size()), filename); + std::span(envs.data(), envs.size()), filename, cache_version); } void LoadPipelines( - std::stop_token stop_loading, const std::filesystem::path& filename, + std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, Common::UniqueFunction load_compute, Common::UniqueFunction> load_graphics); -- cgit v1.2.3 From 8381490a04f4618ec5be90904815b409e3f4ca59 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:05:41 -0300 Subject: opengl: Fix asynchronous shaders Wait for shader to build before configuring it, and wait for the shader to build before sharing it with other contexts. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 30 +++++++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 7 ++++- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c8b2d833d..fac0034fb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,10 +237,12 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, sources_spirv, - shader_notify](ShaderContext::Context*) mutable { + const bool in_parallel = thread_worker != nullptr; + const auto backend = device.GetShaderBackend(); + auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { + switch (backend) { case Settings::ShaderBackend::GLSL: if (!sources[stage].empty()) { source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); @@ -249,6 +251,10 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (in_parallel) { + // Make sure program is built before continuing when building in parallel + glGetString(GL_PROGRAM_ERROR_STRING_NV); + } } break; case Settings::ShaderBackend::SPIRV: @@ -258,10 +264,20 @@ GraphicsPipeline::GraphicsPipeline( break; } } + if (in_parallel && backend != Settings::ShaderBackend::GLASM) { + // Make sure programs have built if we are building shaders in parallel + for (OGLProgram& program : source_programs) { + if (program.handle != 0) { + GLint status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &status); + } + } + } if (shader_notify) { shader_notify->MarkShaderComplete(); } is_built = true; + built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -434,6 +450,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + if (!is_built.load(std::memory_order::relaxed)) { + WaitForBuild(); + } if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { @@ -545,4 +564,9 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } +void GraphicsPipeline::WaitForBuild() { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5e34b9537..4e28d9a42 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -119,6 +119,8 @@ private: void GenerateTransformFeedbackState(); + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -143,13 +145,16 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; - std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; std::array xfb_attribs{}; std::array xfb_streams{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + std::atomic_bool is_built{false}; }; } // namespace OpenGL -- cgit v1.2.3 From 3c6d440015d7ffb81eedbfcd7ee1aab1ea87ee2a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:08:06 -0300 Subject: Revert "renderers: Disable async shader compilation" This reverts commit 4a152767286717fa69bfc94846a124a366f70065. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6afe6c1e1..9692b8e94 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -180,11 +180,9 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); shader_backend = Settings::ShaderBackend::GLSL; } - // Completely disable async shaders for now, as it causes graphical glitches - use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - // !(is_amd || (is_intel && !is_linux)); + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2ce8b4156..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{false}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From f6796cad9c4259aa13aab1f8b2e27392e07432b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:51:10 -0300 Subject: vulkan_device: Blacklist Volta and older from VK_KHR_push_descriptor Causes crashes on Link's Awakening intro. It's hard to debug if it's our fault due to bugs in validation layers. --- src/video_core/vulkan_common/vulkan_device.cpp | 43 +++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 13d938434..44afdc1cd 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -34,6 +34,12 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ }; } // namespace Alternatives +enum class NvidiaArchitecture { + AmpereOrNewer, + Turing, + VoltaOrOlder, +}; + constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_MAINTENANCE1_EXTENSION_NAME, VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, @@ -198,13 +204,34 @@ std::unordered_map GetFormatProperties(vk::Physica std::vector GetSupportedExtensions(vk::PhysicalDevice physical) { const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); - std::vector supported_extensions(std::size(extensions)); + std::vector supported_extensions; + supported_extensions.reserve(extensions.size()); for (const auto& extension : extensions) { supported_extensions.emplace_back(extension.extensionName); } return supported_extensions; } +NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, + std::span exts) { + if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { + VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; + shading_rate_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + VkPhysicalDeviceProperties2KHR physical_properties{}; + physical_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + physical_properties.pNext = &shading_rate_props; + physical.GetProperties2KHR(physical_properties); + if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { + // Only Ampere and newer support this feature + return NvidiaArchitecture::AmpereOrNewer; + } + } + if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { + return NvidiaArchitecture::Turing; + } + return NvidiaArchitecture::VoltaOrOlder; +} } // Anonymous namespace Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR surface, @@ -522,11 +549,19 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectTelemetryParameters(); CollectToolingInfo(); - if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR && is_float16_supported) { - if (std::ranges::find(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != - supported_extensions.end()) { + if (driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { + const auto arch = GetNvidiaArchitecture(physical, supported_extensions); + switch (arch) { + case NvidiaArchitecture::AmpereOrNewer: LOG_WARNING(Render_Vulkan, "Blacklisting Ampere devices from float16 math"); is_float16_supported = false; + break; + case NvidiaArchitecture::Turing: + break; + case NvidiaArchitecture::VoltaOrOlder: + LOG_WARNING(Render_Vulkan, "Blacklisting Volta and older from VK_KHR_push_descriptor"); + khr_push_descriptor = false; + break; } } if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { -- cgit v1.2.3 From a55ff22900c5261915eb8b88f2c0f18a4eb6f30f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 21:52:29 -0300 Subject: vulkan/blit_image: Commit descriptor sets within worker thread Fixes race condition caused. The descriptor pool is not thread safe, so we have to commit descriptor sets within the same thread. --- src/video_core/renderer_vulkan/blit_image.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 4058f62cd..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -376,11 +376,11 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; const VkPipeline pipeline = FindOrEmplacePipeline(key); - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, + src_view](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, @@ -402,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, const VkPipelineLayout layout = *two_textures_pipeline_layout; const VkSampler sampler = *nearest_sampler; const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); - const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); scheduler.RequestRenderpass(dst_framebuffer); scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, - src_stencil_view, descriptor_set, - &device = device](vk::CommandBuffer cmdbuf) { + src_stencil_view, this](vk::CommandBuffer cmdbuf) { // TODO: Barriers + const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, src_stencil_view); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); @@ -448,14 +447,12 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb const VkPipelineLayout layout = *one_texture_pipeline_layout; const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); const VkSampler sampler = *nearest_sampler; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); const VkExtent2D extent{ .width = src_image_view.size.width, .height = src_image_view.size.height, }; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, - &device = device](vk::CommandBuffer cmdbuf) { + scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { const VkOffset2D offset{ .x = 0, .y = 0, @@ -476,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb .tex_scale = {viewport.width, viewport.height}, .tex_offset = {0.0f, 0.0f}, }; + const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); // TODO: Barriers -- cgit v1.2.3