From 2930dccecc933d6748772e9f51a5724fe1e6771b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 8 Feb 2021 02:54:35 -0300 Subject: spirv: Initial SPIR-V support --- .../backend/spirv/emit_spirv_floating_point.cpp | 220 +++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..9c39537e2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -0,0 +1,220 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/modifiers.h" + +namespace Shader::Backend::SPIRV { +namespace { +Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { + const auto flags{inst->Flags()}; + if (flags.no_contraction) { + ctx.Decorate(op, spv::Decoration::NoContraction); + } + switch (flags.rounding) { + case IR::FpRounding::RN: + break; + case IR::FpRounding::RM: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); + break; + case IR::FpRounding::RP: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); + break; + case IR::FpRounding::RZ: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); + break; + } + if (flags.fmz_mode != IR::FmzMode::FTZ) { + throw NotImplementedException("Denorm management not implemented"); + } + return op; +} + +} // Anonymous namespace + +void EmitSPIRV::EmitFPAbs16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPAbs32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPAbs64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f16[1], a, b)); +} + +Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f32[1], a, b)); +} + +Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFAdd(ctx.f64[1], a, b)); +} + +Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f16[1], a, b, c)); +} + +Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f32[1], a, b, c)); +} + +Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { + return Decorate(ctx, inst, ctx.OpFma(ctx.f64[1], a, b, c)); +} + +void EmitSPIRV::EmitFPMax32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMax64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMin32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPMin64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f16[1], a, b)); +} + +Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f32[1], a, b)); +} + +Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + return Decorate(ctx, inst, ctx.OpFMul(ctx.f64[1], a, b)); +} + +void EmitSPIRV::EmitFPNeg16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPNeg32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPNeg64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecip32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecip64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSqrt(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSin(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPExp2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCos(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPLog2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPSaturate64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPFloor64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPCeil64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc16(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc32(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +void EmitSPIRV::EmitFPTrunc64(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +} // namespace Shader::Backend::SPIRV -- cgit v1.2.3 From b5d7279d878211654b4abb165d94af763a365f47 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 04:10:22 -0300 Subject: spirv: Initial bindings support --- externals/sirit | 2 +- src/shader_recompiler/CMakeLists.txt | 4 + .../backend/spirv/emit_context.cpp | 160 +++++++++++++++++ src/shader_recompiler/backend/spirv/emit_context.h | 67 ++++++++ src/shader_recompiler/backend/spirv/emit_spirv.cpp | 189 ++++++++++----------- src/shader_recompiler/backend/spirv/emit_spirv.h | 84 +-------- .../spirv/emit_spirv_bitwise_conversion.cpp | 4 +- .../backend/spirv/emit_spirv_composite.cpp | 2 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 20 +-- .../backend/spirv/emit_spirv_control_flow.cpp | 26 +++ .../backend/spirv/emit_spirv_floating_point.cpp | 18 +- .../backend/spirv/emit_spirv_integer.cpp | 16 +- .../backend/spirv/emit_spirv_memory.cpp | 36 +++- .../backend/spirv/emit_spirv_undefined.cpp | 4 +- src/shader_recompiler/frontend/ir/basic_block.h | 16 ++ src/shader_recompiler/frontend/ir/program.h | 2 + src/shader_recompiler/frontend/maxwell/program.cpp | 7 +- .../ir_opt/collect_shader_info_pass.cpp | 81 +++++++++ .../ir_opt/constant_propagation_pass.cpp | 76 +++++++-- .../global_memory_to_storage_buffer_pass.cpp | 110 ++++++------ src/shader_recompiler/ir_opt/passes.h | 4 +- src/shader_recompiler/main.cpp | 4 +- src/shader_recompiler/shader_info.h | 33 +++- 23 files changed, 672 insertions(+), 293 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_context.cpp create mode 100644 src/shader_recompiler/backend/spirv/emit_context.h create mode 100644 src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/externals/sirit b/externals/sirit index f819ade0e..200310e8f 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit f819ade0efe925a782090dea9e1bf300fedffb39 +Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index e1f4276a1..84be94a8d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,6 @@ add_executable(shader_recompiler + backend/spirv/emit_context.cpp + backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp backend/spirv/emit_spirv.h backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -75,6 +77,7 @@ add_executable(shader_recompiler frontend/maxwell/translate/impl/move_special_register.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h + ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -84,6 +87,7 @@ add_executable(shader_recompiler ir_opt/verification_pass.cpp main.cpp object_pool.h + shader_info.h ) target_include_directories(video_core PRIVATE sirit) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp new file mode 100644 index 000000000..1c985aff8 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -0,0 +1,160 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_context.h" + +namespace Shader::Backend::SPIRV { + +void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { + defs[0] = sirit_ctx.Name(base_type, name); + + std::array def_name; + for (int i = 1; i < 4; ++i) { + const std::string_view def_name_view( + def_name.data(), + fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); + defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); + } +} + +EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { + AddCapability(spv::Capability::Shader); + DefineCommonTypes(program.info); + DefineCommonConstants(); + DefineSpecialVariables(program.info); + DefineConstantBuffers(program.info); + DefineStorageBuffers(program.info); + DefineLabels(program); +} + +EmitContext::~EmitContext() = default; + +Id EmitContext::Def(const IR::Value& value) { + if (!value.IsImmediate()) { + return value.Inst()->Definition(); + } + switch (value.Type()) { + case IR::Type::U1: + return value.U1() ? true_value : false_value; + case IR::Type::U32: + return Constant(U32[1], value.U32()); + case IR::Type::F32: + return Constant(F32[1], value.F32()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} + +void EmitContext::DefineCommonTypes(const Info& info) { + void_id = TypeVoid(); + + U1 = Name(TypeBool(), "u1"); + + F32.Define(*this, TypeFloat(32), "f32"); + U32.Define(*this, TypeInt(32, false), "u32"); + + if (info.uses_fp16) { + AddCapability(spv::Capability::Float16); + F16.Define(*this, TypeFloat(16), "f16"); + } + if (info.uses_fp64) { + AddCapability(spv::Capability::Float64); + F64.Define(*this, TypeFloat(64), "f64"); + } +} + +void EmitContext::DefineCommonConstants() { + true_value = ConstantTrue(U1); + false_value = ConstantFalse(U1); + u32_zero_value = Constant(U32[1], 0U); +} + +void EmitContext::DefineSpecialVariables(const Info& info) { + const auto define{[this](Id type, spv::BuiltIn builtin, spv::StorageClass storage_class) { + const Id pointer_type{TypePointer(storage_class, type)}; + const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::Input)}; + Decorate(id, spv::Decoration::BuiltIn, builtin); + return id; + }}; + using namespace std::placeholders; + const auto define_input{std::bind(define, _1, _2, spv::StorageClass::Input)}; + + if (info.uses_workgroup_id) { + workgroup_id = define_input(U32[3], spv::BuiltIn::WorkgroupId); + } + if (info.uses_local_invocation_id) { + local_invocation_id = define_input(U32[3], spv::BuiltIn::LocalInvocationId); + } +} + +void EmitContext::DefineConstantBuffers(const Info& info) { + if (info.constant_buffer_descriptors.empty()) { + return; + } + const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; + Decorate(array_type, spv::Decoration::ArrayStride, 16U); + + const Id struct_type{TypeStruct(array_type)}; + Name(struct_type, "cbuf_block"); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; + uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); + + u32 binding{}; + for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { + const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; + Decorate(id, spv::Decoration::Binding, binding); + Name(id, fmt::format("c{}", desc.index)); + std::fill_n(cbufs.data() + desc.index, desc.count, id); + binding += desc.count; + } +} + +void EmitContext::DefineStorageBuffers(const Info& info) { + if (info.storage_buffers_descriptors.empty()) { + return; + } + AddExtension("SPV_KHR_storage_buffer_storage_class"); + + const Id array_type{TypeRuntimeArray(U32[1])}; + Decorate(array_type, spv::Decoration::ArrayStride, 4U); + + const Id struct_type{TypeStruct(array_type)}; + Name(struct_type, "ssbo_block"); + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "data"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + + const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; + storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); + + u32 binding{}; + for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { + const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; + Decorate(id, spv::Decoration::Binding, binding); + Name(id, fmt::format("ssbo{}", binding)); + std::fill_n(ssbos.data() + binding, desc.count, id); + binding += desc.count; + } +} + +void EmitContext::DefineLabels(IR::Program& program) { + for (const IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + block->SetDefinition(OpLabel()); + } + } +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h new file mode 100644 index 000000000..c4b84759d --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -0,0 +1,67 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include + +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class VectorTypes { +public: + void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); + + [[nodiscard]] Id operator[](size_t size) const noexcept { + return defs[size - 1]; + } + +private: + std::array defs{}; +}; + +class EmitContext final : public Sirit::Module { +public: + explicit EmitContext(IR::Program& program); + ~EmitContext(); + + [[nodiscard]] Id Def(const IR::Value& value); + + Id void_id{}; + Id U1{}; + VectorTypes F32; + VectorTypes U32; + VectorTypes F16; + VectorTypes F64; + + Id true_value{}; + Id false_value{}; + Id u32_zero_value{}; + + Id uniform_u32{}; + Id storage_u32{}; + + std::array cbufs{}; + std::array ssbos{}; + + Id workgroup_id{}; + Id local_invocation_id{}; + +private: + void DefineCommonTypes(const Info& info); + void DefineCommonConstants(); + void DefineSpecialVariables(const Info& info); + void DefineConstantBuffers(const Info& info); + void DefineStorageBuffers(const Info& info); + void DefineLabels(IR::Program& program); +}; + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 0895414b4..c79c09774 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -12,31 +12,83 @@ #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { +namespace { +template +struct FuncTraits : FuncTraits {}; -EmitContext::EmitContext(IR::Program& program) { - AddCapability(spv::Capability::Shader); - AddCapability(spv::Capability::Float16); - AddCapability(spv::Capability::Float64); - void_id = TypeVoid(); +template +struct FuncTraits { + using ReturnType = ReturnType_; - u1 = Name(TypeBool(), "u1"); - f32.Define(*this, TypeFloat(32), "f32"); - u32.Define(*this, TypeInt(32, false), "u32"); - f16.Define(*this, TypeFloat(16), "f16"); - f64.Define(*this, TypeFloat(64), "f64"); + static constexpr size_t NUM_ARGS = sizeof...(Args); - true_value = ConstantTrue(u1); - false_value = ConstantFalse(u1); + template + using ArgType = std::tuple_element_t>; +}; - for (const IR::Function& function : program.functions) { - for (IR::Block* const block : function.blocks) { - block_label_map.emplace_back(block, OpLabel()); +template +void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { + const Id forward_id{inst->Definition()}; + const bool has_forward_id{Sirit::ValidId(forward_id)}; + Id current_id{}; + if (has_forward_id) { + current_id = ctx.ExchangeCurrentId(forward_id); + } + const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; + if (has_forward_id) { + ctx.ExchangeCurrentId(current_id); + } else { + inst->SetDefinition(new_id); + } +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.Def(arg); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Label(); + } +} + +template +void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition(emit, ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition(emit, ctx, inst, + Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + (emit.*method)(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + (emit.*method)(ctx, Arg>(ctx, inst->Arg(I))...); } } - std::ranges::sort(block_label_map, {}, &std::pair::first); } -EmitContext::~EmitContext() = default; +template +void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(emit, ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(emit, ctx, inst, Indices{}); + } +} +} // Anonymous namespace EmitSPIRV::EmitSPIRV(IR::Program& program) { EmitContext ctx{program}; @@ -46,74 +98,32 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { for (IR::Function& function : program.functions) { func = ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function); for (IR::Block* const block : function.blocks) { - ctx.AddLabel(ctx.BlockLabel(block)); + ctx.AddLabel(block->Definition()); for (IR::Inst& inst : block->Instructions()) { EmitInst(ctx, &inst); } } ctx.OpFunctionEnd(); } - ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main"); + boost::container::small_vector interfaces; + if (program.info.uses_workgroup_id) { + interfaces.push_back(ctx.workgroup_id); + } + if (program.info.uses_local_invocation_id) { + interfaces.push_back(ctx.local_invocation_id); + } + + const std::span interfaces_span(interfaces.data(), interfaces.size()); + ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); + ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); std::vector result{ctx.Assemble()}; - std::FILE* file{std::fopen("shader.spv", "wb")}; + std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; std::fwrite(result.data(), sizeof(u32), result.size(), file); std::fclose(file); - std::system("spirv-dis shader.spv"); - std::system("spirv-val shader.spv"); - std::system("spirv-cross shader.spv"); -} - -template -static void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { - const Id forward_id{inst->Definition()}; - const bool has_forward_id{Sirit::ValidId(forward_id)}; - Id current_id{}; - if (has_forward_id) { - current_id = ctx.ExchangeCurrentId(forward_id); - } - const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; - if (has_forward_id) { - ctx.ExchangeCurrentId(current_id); - } else { - inst->SetDefinition(new_id); - } -} - -template -static void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using M = decltype(method); - using std::is_invocable_r_v; - if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst, ctx.Def(inst->Arg(0)), ctx.Def(inst->Arg(1)), - ctx.Def(inst->Arg(2))); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, ctx.Def(inst->Arg(0)), inst->Arg(1).U32()); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst->Arg(0)); - } else if constexpr (is_invocable_r_v) { - SetDefinition(emit, ctx, inst, inst->Arg(0), inst->Arg(1)); - } else if constexpr (is_invocable_r_v) { - (emit.*method)(ctx, inst); - } else if constexpr (is_invocable_r_v) { - (emit.*method)(ctx); - } else { - static_assert(false, "Bad format"); - } + std::system("spirv-dis D:\\shader.spv") == 0 && + std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && + std::system("spirv-cross -V D:\\shader.spv") == 0; } void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { @@ -130,9 +140,9 @@ void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { static Id TypeId(const EmitContext& ctx, IR::Type type) { switch (type) { case IR::Type::U1: - return ctx.u1; + return ctx.U1; case IR::Type::U32: - return ctx.u32[1]; + return ctx.U32[1]; default: throw NotImplementedException("Phi node type {}", type); } @@ -162,7 +172,7 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { } IR::Block* const phi_block{inst->PhiBlock(index)}; operands.push_back(def); - operands.push_back(ctx.BlockLabel(phi_block)); + operands.push_back(phi_block->Definition()); } const Id result_type{TypeId(ctx, inst->Arg(0).Type())}; return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); @@ -174,29 +184,6 @@ void EmitSPIRV::EmitIdentity(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -// FIXME: Move to its own file -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranch(ctx.BlockLabel(inst->Arg(0).Label())); -} - -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, IR::Inst* inst) { - ctx.OpBranchConditional(ctx.Def(inst->Arg(0)), ctx.BlockLabel(inst->Arg(1).Label()), - ctx.BlockLabel(inst->Arg(2).Label())); -} - -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Inst* inst) { - ctx.OpLoopMerge(ctx.BlockLabel(inst->Arg(0).Label()), ctx.BlockLabel(inst->Arg(1).Label()), - spv::LoopControlMask::MaskNone); -} - -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst) { - ctx.OpSelectionMerge(ctx.BlockLabel(inst->Arg(0).Label()), spv::SelectionControlMask::MaskNone); -} - -void EmitSPIRV::EmitReturn(EmitContext& ctx) { - ctx.OpReturn(); -} - void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7d76377b5..a5d0e1ec0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -7,82 +7,12 @@ #include #include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -using Sirit::Id; - -class VectorTypes { -public: - void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { - defs[0] = sirit_ctx.Name(base_type, name); - - std::array def_name; - for (int i = 1; i < 4; ++i) { - const std::string_view def_name_view( - def_name.data(), - fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); - defs[i] = sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); - } - } - - [[nodiscard]] Id operator[](size_t size) const noexcept { - return defs[size - 1]; - } - -private: - std::array defs; -}; - -class EmitContext final : public Sirit::Module { -public: - explicit EmitContext(IR::Program& program); - ~EmitContext(); - - [[nodiscard]] Id Def(const IR::Value& value) { - if (!value.IsImmediate()) { - return value.Inst()->Definition(); - } - switch (value.Type()) { - case IR::Type::U1: - return value.U1() ? true_value : false_value; - case IR::Type::U32: - return Constant(u32[1], value.U32()); - case IR::Type::F32: - return Constant(f32[1], value.F32()); - default: - throw NotImplementedException("Immediate type {}", value.Type()); - } - } - - [[nodiscard]] Id BlockLabel(IR::Block* block) const { - const auto it{std::ranges::lower_bound(block_label_map, block, {}, - &std::pair::first)}; - if (it == block_label_map.end()) { - throw LogicError("Undefined block"); - } - return it->second; - } - - Id void_id{}; - Id u1{}; - VectorTypes f32; - VectorTypes u32; - VectorTypes f16; - VectorTypes f64; - - Id true_value{}; - Id false_value{}; - - Id workgroup_id{}; - Id local_invocation_id{}; - -private: - std::vector> block_label_map; -}; - class EmitSPIRV { public: explicit EmitSPIRV(IR::Program& program); @@ -94,10 +24,11 @@ private: Id EmitPhi(EmitContext& ctx, IR::Inst* inst); void EmitVoid(EmitContext& ctx); void EmitIdentity(EmitContext& ctx); - void EmitBranch(EmitContext& ctx, IR::Inst* inst); - void EmitBranchConditional(EmitContext& ctx, IR::Inst* inst); - void EmitLoopMerge(EmitContext& ctx, IR::Inst* inst); - void EmitSelectionMerge(EmitContext& ctx, IR::Inst* inst); + void EmitBranch(EmitContext& ctx, IR::Block* label); + void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); + void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); + void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); void EmitReturn(EmitContext& ctx); void EmitGetRegister(EmitContext& ctx); void EmitSetRegister(EmitContext& ctx); @@ -150,7 +81,8 @@ private: void EmitWriteStorageS8(EmitContext& ctx); void EmitWriteStorageU16(EmitContext& ctx); void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx); + void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage64(EmitContext& ctx); void EmitWriteStorage128(EmitContext& ctx); void EmitCompositeConstructU32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 447df5b8c..af82df99c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -11,7 +11,7 @@ void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { } Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { - return ctx.OpBitcast(ctx.u32[1], value); + return ctx.OpBitcast(ctx.U32[1], value); } void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { @@ -23,7 +23,7 @@ void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { } Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { - return ctx.OpBitcast(ctx.f32[1], value); + return ctx.OpBitcast(ctx.F32[1], value); } void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index b190cf876..a7374c89d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -23,7 +23,7 @@ void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { } Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { - return ctx.OpCompositeExtract(ctx.u32[1], vector, index); + return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1eab739ed..f4c9970eb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -37,7 +37,10 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: if (!offset.IsImmediate()) { throw NotImplementedException("Variable constant buffer offset"); } - return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_cbuf"); + const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / 4)}; + const Id cbuf{ctx.cbufs[binding.U32()]}; + const Id access_chain{ctx.OpAccessChain(ctx.uniform_u32, cbuf, ctx.u32_zero_value, imm_offset)}; + return ctx.OpLoad(ctx.U32[1], access_chain); } void EmitSPIRV::EmitGetAttribute(EmitContext&) { @@ -89,22 +92,11 @@ void EmitSPIRV::EmitSetOFlag(EmitContext&) { } Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { - if (ctx.workgroup_id.value == 0) { - ctx.workgroup_id = ctx.AddGlobalVariable( - ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); - ctx.Decorate(ctx.workgroup_id, spv::Decoration::BuiltIn, spv::BuiltIn::WorkgroupId); - } - return ctx.OpLoad(ctx.u32[3], ctx.workgroup_id); + return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { - if (ctx.local_invocation_id.value == 0) { - ctx.local_invocation_id = ctx.AddGlobalVariable( - ctx.TypePointer(spv::StorageClass::Input, ctx.u32[3]), spv::StorageClass::Input); - ctx.Decorate(ctx.local_invocation_id, spv::Decoration::BuiltIn, - spv::BuiltIn::LocalInvocationId); - } - return ctx.OpLoad(ctx.u32[3], ctx.local_invocation_id); + return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 66ce6c8c5..549c1907a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,3 +3,29 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { + ctx.OpBranch(label->Definition()); +} + +void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label) { + ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); +} + +void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { + ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), + spv::LoopControlMask::MaskNone); +} + +void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { + ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); +} + +void EmitSPIRV::EmitReturn(EmitContext& ctx) { + ctx.OpReturn(); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9c39537e2..c9bc121f8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -46,27 +46,27 @@ void EmitSPIRV::EmitFPAbs64(EmitContext&) { } Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFAdd(ctx.f16[1], a, b)); + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFAdd(ctx.f32[1], a, b)); + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFAdd(ctx.f64[1], a, b)); + return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { - return Decorate(ctx, inst, ctx.OpFma(ctx.f16[1], a, b, c)); + return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { - return Decorate(ctx, inst, ctx.OpFma(ctx.f32[1], a, b, c)); + return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { - return Decorate(ctx, inst, ctx.OpFma(ctx.f64[1], a, b, c)); + return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } void EmitSPIRV::EmitFPMax32(EmitContext&) { @@ -86,15 +86,15 @@ void EmitSPIRV::EmitFPMin64(EmitContext&) { } Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFMul(ctx.f16[1], a, b)); + return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFMul(ctx.f32[1], a, b)); + return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - return Decorate(ctx, inst, ctx.OpFMul(ctx.f64[1], a, b)); + return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } void EmitSPIRV::EmitFPNeg16(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index e811a63ab..32af94a73 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -10,7 +10,7 @@ Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } - return ctx.OpIAdd(ctx.u32[1], a, b); + return ctx.OpIAdd(ctx.U32[1], a, b); } void EmitSPIRV::EmitIAdd64(EmitContext&) { @@ -18,7 +18,7 @@ void EmitSPIRV::EmitIAdd64(EmitContext&) { } Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { - return ctx.OpISub(ctx.u32[1], a, b); + return ctx.OpISub(ctx.U32[1], a, b); } void EmitSPIRV::EmitISub64(EmitContext&) { @@ -26,7 +26,7 @@ void EmitSPIRV::EmitISub64(EmitContext&) { } Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { - return ctx.OpIMul(ctx.u32[1], a, b); + return ctx.OpIMul(ctx.U32[1], a, b); } void EmitSPIRV::EmitINeg32(EmitContext&) { @@ -38,7 +38,7 @@ void EmitSPIRV::EmitIAbs32(EmitContext&) { } Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { - return ctx.OpShiftLeftLogical(ctx.u32[1], base, shift); + return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { @@ -70,11 +70,11 @@ void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { } Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { - return ctx.OpBitFieldUExtract(ctx.u32[1], base, offset, count); + return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpSLessThan(ctx.u1, lhs, rhs); + return ctx.OpSLessThan(ctx.U1, lhs, rhs); } void EmitSPIRV::EmitULessThan(EmitContext&) { @@ -94,7 +94,7 @@ void EmitSPIRV::EmitULessThanEqual(EmitContext&) { } Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpSGreaterThan(ctx.u1, lhs, rhs); + return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } void EmitSPIRV::EmitUGreaterThan(EmitContext&) { @@ -110,7 +110,7 @@ void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { } Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpUGreaterThanEqual(ctx.u1, lhs, rhs); + return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } void EmitSPIRV::EmitLogicalOr(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 21a0d72fa..5769a3c95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -2,10 +2,26 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "shader_recompiler/backend/spirv/emit_spirv.h" namespace Shader::Backend::SPIRV { +static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { + if (offset.IsImmediate()) { + const u32 imm_offset{static_cast(offset.U32() / element_size)}; + return ctx.Constant(ctx.U32[1], imm_offset); + } + const u32 shift{static_cast(std::countr_zero(element_size))}; + const Id index{ctx.Def(offset)}; + if (shift == 0) { + return index; + } + const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); +} + void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } @@ -79,11 +95,14 @@ void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { } Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset) { + const IR::Value& offset) { if (!binding.IsImmediate()) { - throw NotImplementedException("Storage buffer indexing"); + throw NotImplementedException("Dynamic storage buffer indexing"); } - return ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf"); + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + return ctx.OpLoad(ctx.U32[1], pointer); } void EmitSPIRV::EmitLoadStorage64(EmitContext&) { @@ -110,8 +129,15 @@ void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx) { - ctx.Name(ctx.OpUndef(ctx.u32[1]), "unimplemented_sbuf_store"); +void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)}; + ctx.OpStore(pointer, value); } void EmitSPIRV::EmitWriteStorage64(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index a6f542360..c1ed8f281 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -7,7 +7,7 @@ namespace Shader::Backend::SPIRV { Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { - return ctx.OpUndef(ctx.u1); + return ctx.OpUndef(ctx.U1); } Id EmitSPIRV::EmitUndefU8(EmitContext&) { @@ -19,7 +19,7 @@ Id EmitSPIRV::EmitUndefU16(EmitContext&) { } Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { - return ctx.OpUndef(ctx.u32[1]); + return ctx.OpUndef(ctx.U32[1]); } Id EmitSPIRV::EmitUndefU64(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h index 778b32e43..b14a35ec5 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.h +++ b/src/shader_recompiler/frontend/ir/basic_block.h @@ -11,6 +11,7 @@ #include +#include "common/bit_cast.h" #include "shader_recompiler/frontend/ir/condition.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/value.h" @@ -68,6 +69,18 @@ public: /// Gets an immutable span to the immediate predecessors. [[nodiscard]] std::span ImmediatePredecessors() const noexcept; + /// Intrusively store the host definition of this instruction. + template + void SetDefinition(DefinitionType def) { + definition = Common::BitCast(def); + } + + /// Return the intrusively stored host definition of this instruction. + template + [[nodiscard]] DefinitionType Definition() const noexcept { + return Common::BitCast(definition); + } + [[nodiscard]] Condition BranchCondition() const noexcept { return branch_cond; } @@ -161,6 +174,9 @@ private: Block* branch_false{nullptr}; /// Block immediate predecessors std::vector imm_predecessors; + + /// Intrusively stored host definition of this block. + u32 definition{}; }; using BlockList = std::vector; diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index efaf1aa1e..98aab2dc6 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -9,11 +9,13 @@ #include #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/shader_info.h" namespace Shader::IR { struct Program { boost::container::small_vector functions; + Info info; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index dab6d68c0..8331d576c 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -53,21 +53,22 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPoolInstructions()) { + Visit(info, inst); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index cbde65b9b..f1ad16d60 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -77,6 +77,16 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { return true; } +template +bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { + if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { + return false; + } + using Indices = std::make_index_sequence::NUM_ARGS>; + inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + return true; +} + void FoldGetRegister(IR::Inst& inst) { if (inst.Arg(0).Reg() == IR::Reg::RZ) { inst.ReplaceUsesWith(IR::Value{u32{0}}); @@ -103,6 +113,52 @@ void FoldAdd(IR::Inst& inst) { } } +void FoldISub32(IR::Inst& inst) { + if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { + return; + } + if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { + return; + } + // ISub32 is generally used to subtract two constant buffers, compare and replace this with + // zero if they equal. + const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { + return a->Opcode() == IR::Opcode::GetCbuf && b->Opcode() == IR::Opcode::GetCbuf && + a->Arg(0) == b->Arg(0) && a->Arg(1) == b->Arg(1); + }}; + IR::Inst* op_a{inst.Arg(0).InstRecursive()}; + IR::Inst* op_b{inst.Arg(1).InstRecursive()}; + if (equal_cbuf(op_a, op_b)) { + inst.ReplaceUsesWith(IR::Value{u32{0}}); + return; + } + // It's also possible a value is being added to a cbuf and then subtracted + if (op_b->Opcode() == IR::Opcode::IAdd32) { + // Canonicalize local variables to simplify the following logic + std::swap(op_a, op_b); + } + if (op_b->Opcode() != IR::Opcode::GetCbuf) { + return; + } + IR::Inst* const inst_cbuf{op_b}; + if (op_a->Opcode() != IR::Opcode::IAdd32) { + return; + } + IR::Value add_op_a{op_a->Arg(0)}; + IR::Value add_op_b{op_a->Arg(1)}; + if (add_op_b.IsImmediate()) { + // Canonicalize + std::swap(add_op_a, add_op_b); + } + if (add_op_b.IsImmediate()) { + return; + } + IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; + if (equal_cbuf(add_cbuf, inst_cbuf)) { + inst.ReplaceUsesWith(add_op_a); + } +} + template void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; @@ -170,15 +226,6 @@ IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence< return IR::Value{func(Arg>(inst.Arg(I))...)}; } -template -void FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { - if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { - return; - } - using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); -} - void FoldBranchConditional(IR::Inst& inst) { const IR::U1 cond{inst.Arg(0)}; if (cond.IsImmediate()) { @@ -205,6 +252,8 @@ void ConstantPropagation(IR::Inst& inst) { return FoldGetPred(inst); case IR::Opcode::IAdd32: return FoldAdd(inst); + case IR::Opcode::ISub32: + return FoldISub32(inst); case IR::Opcode::BitCastF32U32: return FoldBitCast(inst, IR::Opcode::BitCastU32F32); case IR::Opcode::BitCastU32F32: @@ -220,17 +269,20 @@ void ConstantPropagation(IR::Inst& inst) { case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); case IR::Opcode::SLessThan: - return FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); + return; case IR::Opcode::ULessThan: - return FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); + return; case IR::Opcode::BitFieldUExtract: - return FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { + FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { if (static_cast(shift) + static_cast(count) > Common::BitSize()) { throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, base, shift, count); } return (base >> shift) & ((1U << count) - 1); }); + return; case IR::Opcode::BranchConditional: return FoldBranchConditional(inst); default: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index b40c0c57b..bf230a850 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -28,7 +28,8 @@ struct StorageBufferAddr { /// Block iterator to a global memory instruction and the storage buffer it uses struct StorageInst { StorageBufferAddr storage_buffer; - IR::Block::iterator inst; + IR::Inst* inst; + IR::Block* block; }; /// Bias towards a certain range of constant buffers when looking for storage buffers @@ -41,7 +42,7 @@ struct Bias { using StorageBufferSet = boost::container::flat_set, boost::container::small_vector>; -using StorageInstVector = boost::container::small_vector; +using StorageInstVector = boost::container::small_vector; /// Returns true when the instruction is a global memory instruction bool IsGlobalMemory(const IR::Inst& inst) { @@ -109,23 +110,22 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce } /// Discards a global memory operation, reads return zero and writes are ignored -void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { +void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::Value zero{u32{0}}; - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: case IR::Opcode::LoadGlobalU16: case IR::Opcode::LoadGlobal32: - inst->ReplaceUsesWith(zero); + inst.ReplaceUsesWith(zero); break; case IR::Opcode::LoadGlobal64: - inst->ReplaceUsesWith(IR::Value{ - &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); break; case IR::Opcode::LoadGlobal128: - inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( - inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})}); + inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); break; case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalU8: @@ -134,11 +134,10 @@ void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) { case IR::Opcode::WriteGlobal32: case IR::Opcode::WriteGlobal64: case IR::Opcode::WriteGlobal128: - inst->Invalidate(); + inst.Invalidate(); break; default: - throw LogicError("Invalid opcode to discard its global memory operation {}", - inst->Opcode()); + throw LogicError("Invalid opcode to discard its global memory operation {}", inst.Opcode()); } } @@ -232,8 +231,8 @@ std::optional Track(const IR::Value& value, const Bias* bias) } /// Collects the storage buffer used by a global memory instruction and the instruction itself -void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, - StorageBufferSet& storage_buffer_set, StorageInstVector& to_replace) { +void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, + StorageInstVector& to_replace) { // NVN puts storage buffers in a specific range, we have to bias towards these addresses to // avoid getting false positives static constexpr Bias nvn_bias{ @@ -241,19 +240,13 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, .offset_begin{0x110}, .offset_end{0x610}, }; - // First try to find storage buffers in the NVN address - const IR::U64 addr{inst->Arg(0)}; - if (addr.IsImmediate()) { - // Immediate addresses can't be lowered to a storage buffer - DiscardGlobalMemory(block, inst); - return; - } // Track the low address of the instruction - const std::optional low_addr_info{TrackLowAddress(addr.InstRecursive())}; + const std::optional low_addr_info{TrackLowAddress(&inst)}; if (!low_addr_info) { DiscardGlobalMemory(block, inst); return; } + // First try to find storage buffers in the NVN address const IR::U32 low_addr{low_addr_info->value}; std::optional storage_buffer{Track(low_addr, &nvn_bias)}; if (!storage_buffer) { @@ -269,21 +262,22 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst, storage_buffer_set.insert(*storage_buffer); to_replace.push_back(StorageInst{ .storage_buffer{*storage_buffer}, - .inst{inst}, + .inst{&inst}, + .block{&block}, }); } /// Returns the offset in indices (not bytes) for an equivalent storage instruction -IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferAddr buffer) { - IR::IREmitter ir{block, inst}; +IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::U32 offset; - if (const std::optional low_addr{TrackLowAddress(&*inst)}) { + if (const std::optional low_addr{TrackLowAddress(&inst)}) { offset = low_addr->value; if (low_addr->imm_offset != 0) { offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); } } else { - offset = ir.ConvertU(32, IR::U64{inst->Arg(0)}); + offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); } // Subtract the least significant 32 bits from the guest offset. The result is the storage // buffer offset in bytes. @@ -292,25 +286,27 @@ IR::U32 StorageOffset(IR::Block& block, IR::Block::iterator inst, StorageBufferA } /// Replace a global memory load instruction with its storage buffer equivalent -void ReplaceLoad(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - const IR::Value value{&*block.PrependNewInst(inst, new_opcode, {storage_index, offset})}; - inst->ReplaceUsesWith(value); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; + inst.ReplaceUsesWith(value); } /// Replace a global memory write instruction with its storage buffer equivalent -void ReplaceWrite(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - const IR::Opcode new_opcode{GlobalToStorage(inst->Opcode())}; - block.PrependNewInst(inst, new_opcode, {storage_index, offset, inst->Arg(1)}); - inst->Invalidate(); + const IR::Opcode new_opcode{GlobalToStorage(inst.Opcode())}; + const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; + block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); + inst.Invalidate(); } /// Replace a global memory instruction with its storage buffer equivalent -void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_index, +void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, const IR::U32& offset) { - switch (inst->Opcode()) { + switch (inst.Opcode()) { case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalU8: case IR::Opcode::LoadGlobalS16: @@ -328,26 +324,44 @@ void Replace(IR::Block& block, IR::Block::iterator inst, const IR::U32& storage_ case IR::Opcode::WriteGlobal128: return ReplaceWrite(block, inst, storage_index, offset); default: - throw InvalidArgument("Invalid global memory opcode {}", inst->Opcode()); + throw InvalidArgument("Invalid global memory opcode {}", inst.Opcode()); } } } // Anonymous namespace -void GlobalMemoryToStorageBufferPass(IR::Block& block) { +void GlobalMemoryToStorageBufferPass(IR::Program& program) { StorageBufferSet storage_buffers; StorageInstVector to_replace; - for (IR::Block::iterator inst{block.begin()}; inst != block.end(); ++inst) { - if (!IsGlobalMemory(*inst)) { - continue; + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (!IsGlobalMemory(inst)) { + continue; + } + CollectStorageBuffers(*block, inst, storage_buffers, to_replace); + } } - CollectStorageBuffers(block, inst, storage_buffers, to_replace); } - for (const auto [storage_buffer, inst] : to_replace) { - const auto it{storage_buffers.find(storage_buffer)}; - const IR::U32 storage_index{IR::Value{static_cast(storage_buffers.index_of(it))}}; - const IR::U32 offset{StorageOffset(block, inst, storage_buffer)}; - Replace(block, inst, storage_index, offset); + Info& info{program.info}; + u32 storage_index{}; + for (const StorageBufferAddr& storage_buffer : storage_buffers) { + info.storage_buffers_descriptors.push_back({ + .cbuf_index{storage_buffer.index}, + .cbuf_offset{storage_buffer.offset}, + .count{1}, + }); + info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); + ++storage_index; + } + for (const StorageInst& storage_inst : to_replace) { + const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; + const auto it{storage_buffers.find(storage_inst.storage_buffer)}; + const IR::U32 index{IR::Value{static_cast(storage_buffers.index_of(it))}}; + IR::Block* const block{storage_inst.block}; + IR::Inst* const inst{storage_inst.inst}; + const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; + Replace(*block, *inst, index, offset); } } diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 30eb31588..89e5811d3 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -8,6 +8,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/function.h" +#include "shader_recompiler/frontend/ir/program.h" namespace Shader::Optimization { @@ -18,9 +19,10 @@ void PostOrderInvoke(Func&& func, IR::Function& function) { } } +void CollectShaderInfoPass(IR::Program& program); void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); -void GlobalMemoryToStorageBufferPass(IR::Block& block); +void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 216345e91..1610bb34e 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -67,8 +67,8 @@ int main() { ObjectPool inst_pool; ObjectPool block_pool; - // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - FileEnvironment env{"D:\\Shaders\\shader.bin"}; + FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + // FileEnvironment env{"D:\\Shaders\\shader.bin"}; block_pool.ReleaseContents(); inst_pool.ReleaseContents(); flow_block_pool.ReleaseContents(); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 1760bf4a9..f49a79368 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -6,23 +6,40 @@ #include +#include "common/common_types.h" + #include namespace Shader { struct Info { - struct ConstantBuffer { + static constexpr size_t MAX_CBUFS{18}; + static constexpr size_t MAX_SSBOS{16}; + + struct ConstantBufferDescriptor { + u32 index; + u32 count; + }; + struct StorageBufferDescriptor { + u32 cbuf_index; + u32 cbuf_offset; + u32 count; }; - struct { - bool workgroup_id{}; - bool local_invocation_id{}; - bool fp16{}; - bool fp64{}; - } uses; + bool uses_workgroup_id{}; + bool uses_local_invocation_id{}; + bool uses_fp16{}; + bool uses_fp64{}; + + u32 constant_buffer_mask{}; + + std::array constant_buffers{}; + boost::container::static_vector + constant_buffer_descriptors; - std::array<18 + std::array storage_buffers{}; + boost::container::static_vector storage_buffers_descriptors; }; } // namespace Shader -- cgit v1.2.3 From 85cce78583bc2232428a8fb39e43182877c8d5ad Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 17 Feb 2021 00:59:28 -0300 Subject: shader: Primitive Vulkan integration --- src/shader_recompiler/CMakeLists.txt | 13 +- .../backend/spirv/emit_context.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.cpp | 117 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 419 ++-- .../spirv/emit_spirv_bitwise_conversion.cpp | 24 +- .../backend/spirv/emit_spirv_composite.cpp | 48 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 42 +- .../backend/spirv/emit_spirv_control_flow.cpp | 10 +- .../backend/spirv/emit_spirv_floating_point.cpp | 92 +- .../backend/spirv/emit_spirv_integer.cpp | 60 +- .../backend/spirv/emit_spirv_logical.cpp | 40 +- .../backend/spirv/emit_spirv_memory.cpp | 56 +- .../backend/spirv/emit_spirv_select.cpp | 8 +- .../backend/spirv/emit_spirv_undefined.cpp | 10 +- src/shader_recompiler/environment.h | 6 +- src/shader_recompiler/file_environment.cpp | 6 +- src/shader_recompiler/file_environment.h | 4 +- src/shader_recompiler/frontend/ir/basic_block.cpp | 2 + src/shader_recompiler/frontend/ir/post_order.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 8 + .../frontend/maxwell/translate/impl/impl.h | 1 + .../maxwell/translate/impl/move_register.cpp | 35 +- .../maxwell/translate/impl/not_implemented.cpp | 4 - src/shader_recompiler/main.cpp | 2 +- src/shader_recompiler/profile.h | 13 + src/shader_recompiler/recompiler.cpp | 27 + src/shader_recompiler/recompiler.h | 18 + src/video_core/CMakeLists.txt | 6 +- src/video_core/engines/kepler_compute.h | 1 - src/video_core/engines/shader_bytecode.h | 2298 -------------------- src/video_core/engines/shader_header.h | 158 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 140 +- .../renderer_vulkan/vk_compute_pipeline.h | 43 +- .../renderer_vulkan/vk_descriptor_pool.cpp | 6 +- .../renderer_vulkan/vk_descriptor_pool.h | 10 +- src/video_core/renderer_vulkan/vk_pipeline.h | 36 + .../renderer_vulkan/vk_pipeline_cache.cpp | 190 +- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.h | 3 - .../renderer_vulkan/vk_resource_pool.cpp | 12 +- src/video_core/renderer_vulkan/vk_resource_pool.h | 12 +- 43 files changed, 1003 insertions(+), 3036 deletions(-) create mode 100644 src/shader_recompiler/profile.h create mode 100644 src/shader_recompiler/recompiler.cpp create mode 100644 src/shader_recompiler/recompiler.h delete mode 100644 src/video_core/engines/shader_bytecode.h delete mode 100644 src/video_core/engines/shader_header.h create mode 100644 src/video_core/renderer_vulkan/vk_pipeline.h (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 84be94a8d..b56bdd3d9 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(shader_recompiler +add_library(shader_recompiler STATIC backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -85,13 +85,19 @@ add_executable(shader_recompiler ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp - main.cpp object_pool.h + profile.h + recompiler.cpp + recompiler.h shader_info.h ) -target_include_directories(video_core PRIVATE sirit) +target_include_directories(shader_recompiler PRIVATE sirit) target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) +target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) + +add_executable(shader_util main.cpp) +target_link_libraries(shader_util PRIVATE shader_recompiler) if (MSVC) target_compile_options(shader_recompiler PRIVATE @@ -121,3 +127,4 @@ else() endif() create_target_directory_groups(shader_recompiler) +create_target_directory_groups(shader_util) diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 1c985aff8..770067d98 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -115,6 +115,7 @@ void EmitContext::DefineConstantBuffers(const Info& info) { for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); binding += desc.count; @@ -143,6 +144,7 @@ void EmitContext::DefineStorageBuffers(const Info& info) { for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("ssbo{}", binding)); std::fill_n(ssbos.data() + binding, desc.count, id); binding += desc.count; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 55018332e..d59718435 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -2,8 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include +#include #include +#include +#include #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/basic_block.h" @@ -14,10 +17,10 @@ namespace Shader::Backend::SPIRV { namespace { template -struct FuncTraits : FuncTraits {}; +struct FuncTraits : FuncTraits {}; -template -struct FuncTraits { +template +struct FuncTraits { using ReturnType = ReturnType_; static constexpr size_t NUM_ARGS = sizeof...(Args); @@ -26,15 +29,15 @@ struct FuncTraits { using ArgType = std::tuple_element_t>; }; -template -void SetDefinition(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, Args... args) { +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { const Id forward_id{inst->Definition()}; const bool has_forward_id{Sirit::ValidId(forward_id)}; Id current_id{}; if (has_forward_id) { current_id = ctx.ExchangeCurrentId(forward_id); } - const Id new_id{(emit.*method)(ctx, std::forward(args)...)}; + const Id new_id{func(ctx, std::forward(args)...)}; if (has_forward_id) { ctx.ExchangeCurrentId(current_id); } else { @@ -55,42 +58,62 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst, std::index_sequence) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; if constexpr (std::is_same_v) { if constexpr (is_first_arg_inst) { - SetDefinition(emit, ctx, inst, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, inst, Arg>(ctx, inst->Arg(I))...); } else { - SetDefinition(emit, ctx, inst, - Arg>(ctx, inst->Arg(I))...); + SetDefinition(ctx, inst, Arg>(ctx, inst->Arg(I))...); } } else { if constexpr (is_first_arg_inst) { - (emit.*method)(ctx, inst, Arg>(ctx, inst->Arg(I))...); + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); } else { - (emit.*method)(ctx, Arg>(ctx, inst->Arg(I))...); + func(ctx, Arg>(ctx, inst->Arg(I))...); } } } -template -void Invoke(EmitSPIRV& emit, EmitContext& ctx, IR::Inst* inst) { - using Traits = FuncTraits; +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { - Invoke(emit, ctx, inst, std::make_index_sequence<0>{}); + Invoke(ctx, inst, std::make_index_sequence<0>{}); } else { using FirstArgType = typename Traits::template ArgType<1>; static constexpr bool is_first_arg_inst = std::is_same_v; using Indices = std::make_index_sequence; - Invoke(emit, ctx, inst, Indices{}); + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->Opcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->Opcode()); +} + +Id TypeId(const EmitContext& ctx, IR::Type type) { + switch (type) { + case IR::Type::U1: + return ctx.U1; + case IR::Type::U32: + return ctx.U32[1]; + default: + throw NotImplementedException("Phi node type {}", type); } } } // Anonymous namespace -EmitSPIRV::EmitSPIRV(IR::Program& program) { +std::vector EmitSPIRV(Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -112,43 +135,17 @@ EmitSPIRV::EmitSPIRV(IR::Program& program) { if (program.info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } - const std::span interfaces_span(interfaces.data(), interfaces.size()); - ctx.AddEntryPoint(spv::ExecutionModel::Fragment, func, "main", interfaces_span); - ctx.AddExecutionMode(func, spv::ExecutionMode::OriginUpperLeft); - - std::vector result{ctx.Assemble()}; - std::FILE* file{std::fopen("D:\\shader.spv", "wb")}; - std::fwrite(result.data(), sizeof(u32), result.size(), file); - std::fclose(file); - std::system("spirv-dis D:\\shader.spv") == 0 && - std::system("spirv-val --uniform-buffer-standard-layout D:\\shader.spv") == 0 && - std::system("spirv-cross -V D:\\shader.spv") == 0; -} + ctx.AddEntryPoint(spv::ExecutionModel::GLCompute, func, "main", interfaces_span); -void EmitSPIRV::EmitInst(EmitContext& ctx, IR::Inst* inst) { - switch (inst->Opcode()) { -#define OPCODE(name, result_type, ...) \ - case IR::Opcode::name: \ - return Invoke<&EmitSPIRV::Emit##name>(*this, ctx, inst); -#include "shader_recompiler/frontend/ir/opcodes.inc" -#undef OPCODE - } - throw LogicError("Invalid opcode {}", inst->Opcode()); -} + const std::array workgroup_size{env.WorkgroupSize()}; + ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], + workgroup_size[2]); -static Id TypeId(const EmitContext& ctx, IR::Type type) { - switch (type) { - case IR::Type::U1: - return ctx.U1; - case IR::Type::U32: - return ctx.U32[1]; - default: - throw NotImplementedException("Phi node type {}", type); - } + return ctx.Assemble(); } -Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { +Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { const size_t num_args{inst->NumArgs()}; boost::container::small_vector operands; operands.reserve(num_args * 2); @@ -178,25 +175,25 @@ Id EmitSPIRV::EmitPhi(EmitContext& ctx, IR::Inst* inst) { return ctx.OpPhi(result_type, std::span(operands.data(), operands.size())); } -void EmitSPIRV::EmitVoid(EmitContext&) {} +void EmitVoid(EmitContext&) {} -Id EmitSPIRV::EmitIdentity(EmitContext& ctx, const IR::Value& value) { +Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { return ctx.Def(value); } -void EmitSPIRV::EmitGetZeroFromOp(EmitContext&) { +void EmitGetZeroFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetSignFromOp(EmitContext&) { +void EmitGetSignFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetCarryFromOp(EmitContext&) { +void EmitGetCarryFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } -void EmitSPIRV::EmitGetOverflowFromOp(EmitContext&) { +void EmitGetOverflowFromOp(EmitContext&) { throw LogicError("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 8bde82613..5813f51ff 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -8,223 +8,218 @@ #include "common/common_types.h" #include "shader_recompiler/backend/spirv/emit_context.h" +#include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" namespace Shader::Backend::SPIRV { -class EmitSPIRV { -public: - explicit EmitSPIRV(IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); -private: - void EmitInst(EmitContext& ctx, IR::Inst* inst); - - // Microinstruction emitters - Id EmitPhi(EmitContext& ctx, IR::Inst* inst); - void EmitVoid(EmitContext& ctx); - Id EmitIdentity(EmitContext& ctx, const IR::Value& value); - void EmitBranch(EmitContext& ctx, IR::Block* label); - void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label); - void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); - void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); - void EmitReturn(EmitContext& ctx); - void EmitGetRegister(EmitContext& ctx); - void EmitSetRegister(EmitContext& ctx); - void EmitGetPred(EmitContext& ctx); - void EmitSetPred(EmitContext& ctx); - void EmitSetGotoVariable(EmitContext& ctx); - void EmitGetGotoVariable(EmitContext& ctx); - Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitGetAttribute(EmitContext& ctx); - void EmitSetAttribute(EmitContext& ctx); - void EmitGetAttributeIndexed(EmitContext& ctx); - void EmitSetAttributeIndexed(EmitContext& ctx); - void EmitGetZFlag(EmitContext& ctx); - void EmitGetSFlag(EmitContext& ctx); - void EmitGetCFlag(EmitContext& ctx); - void EmitGetOFlag(EmitContext& ctx); - void EmitSetZFlag(EmitContext& ctx); - void EmitSetSFlag(EmitContext& ctx); - void EmitSetCFlag(EmitContext& ctx); - void EmitSetOFlag(EmitContext& ctx); - Id EmitWorkgroupId(EmitContext& ctx); - Id EmitLocalInvocationId(EmitContext& ctx); - Id EmitUndefU1(EmitContext& ctx); - Id EmitUndefU8(EmitContext& ctx); - Id EmitUndefU16(EmitContext& ctx); - Id EmitUndefU32(EmitContext& ctx); - Id EmitUndefU64(EmitContext& ctx); - void EmitLoadGlobalU8(EmitContext& ctx); - void EmitLoadGlobalS8(EmitContext& ctx); - void EmitLoadGlobalU16(EmitContext& ctx); - void EmitLoadGlobalS16(EmitContext& ctx); - void EmitLoadGlobal32(EmitContext& ctx); - void EmitLoadGlobal64(EmitContext& ctx); - void EmitLoadGlobal128(EmitContext& ctx); - void EmitWriteGlobalU8(EmitContext& ctx); - void EmitWriteGlobalS8(EmitContext& ctx); - void EmitWriteGlobalU16(EmitContext& ctx); - void EmitWriteGlobalS16(EmitContext& ctx); - void EmitWriteGlobal32(EmitContext& ctx); - void EmitWriteGlobal64(EmitContext& ctx); - void EmitWriteGlobal128(EmitContext& ctx); - void EmitLoadStorageU8(EmitContext& ctx); - void EmitLoadStorageS8(EmitContext& ctx); - void EmitLoadStorageU16(EmitContext& ctx); - void EmitLoadStorageS16(EmitContext& ctx); - Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); - void EmitLoadStorage64(EmitContext& ctx); - void EmitLoadStorage128(EmitContext& ctx); - void EmitWriteStorageU8(EmitContext& ctx); - void EmitWriteStorageS8(EmitContext& ctx); - void EmitWriteStorageU16(EmitContext& ctx); - void EmitWriteStorageS16(EmitContext& ctx); - void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); - void EmitWriteStorage64(EmitContext& ctx); - void EmitWriteStorage128(EmitContext& ctx); - void EmitCompositeConstructU32x2(EmitContext& ctx); - void EmitCompositeConstructU32x3(EmitContext& ctx); - void EmitCompositeConstructU32x4(EmitContext& ctx); - void EmitCompositeExtractU32x2(EmitContext& ctx); - Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); - void EmitCompositeExtractU32x4(EmitContext& ctx); - void EmitCompositeConstructF16x2(EmitContext& ctx); - void EmitCompositeConstructF16x3(EmitContext& ctx); - void EmitCompositeConstructF16x4(EmitContext& ctx); - void EmitCompositeExtractF16x2(EmitContext& ctx); - void EmitCompositeExtractF16x3(EmitContext& ctx); - void EmitCompositeExtractF16x4(EmitContext& ctx); - void EmitCompositeConstructF32x2(EmitContext& ctx); - void EmitCompositeConstructF32x3(EmitContext& ctx); - void EmitCompositeConstructF32x4(EmitContext& ctx); - void EmitCompositeExtractF32x2(EmitContext& ctx); - void EmitCompositeExtractF32x3(EmitContext& ctx); - void EmitCompositeExtractF32x4(EmitContext& ctx); - void EmitCompositeConstructF64x2(EmitContext& ctx); - void EmitCompositeConstructF64x3(EmitContext& ctx); - void EmitCompositeConstructF64x4(EmitContext& ctx); - void EmitCompositeExtractF64x2(EmitContext& ctx); - void EmitCompositeExtractF64x3(EmitContext& ctx); - void EmitCompositeExtractF64x4(EmitContext& ctx); - void EmitSelect8(EmitContext& ctx); - void EmitSelect16(EmitContext& ctx); - void EmitSelect32(EmitContext& ctx); - void EmitSelect64(EmitContext& ctx); - void EmitBitCastU16F16(EmitContext& ctx); - Id EmitBitCastU32F32(EmitContext& ctx, Id value); - void EmitBitCastU64F64(EmitContext& ctx); - void EmitBitCastF16U16(EmitContext& ctx); - Id EmitBitCastF32U32(EmitContext& ctx, Id value); - void EmitBitCastF64U64(EmitContext& ctx); - void EmitPackUint2x32(EmitContext& ctx); - void EmitUnpackUint2x32(EmitContext& ctx); - void EmitPackFloat2x16(EmitContext& ctx); - void EmitUnpackFloat2x16(EmitContext& ctx); - void EmitPackDouble2x32(EmitContext& ctx); - void EmitUnpackDouble2x32(EmitContext& ctx); - void EmitGetZeroFromOp(EmitContext& ctx); - void EmitGetSignFromOp(EmitContext& ctx); - void EmitGetCarryFromOp(EmitContext& ctx); - void EmitGetOverflowFromOp(EmitContext& ctx); - void EmitFPAbs16(EmitContext& ctx); - void EmitFPAbs32(EmitContext& ctx); - void EmitFPAbs64(EmitContext& ctx); - Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); - void EmitFPMax32(EmitContext& ctx); - void EmitFPMax64(EmitContext& ctx); - void EmitFPMin32(EmitContext& ctx); - void EmitFPMin64(EmitContext& ctx); - Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitFPNeg16(EmitContext& ctx); - void EmitFPNeg32(EmitContext& ctx); - void EmitFPNeg64(EmitContext& ctx); - void EmitFPRecip32(EmitContext& ctx); - void EmitFPRecip64(EmitContext& ctx); - void EmitFPRecipSqrt32(EmitContext& ctx); - void EmitFPRecipSqrt64(EmitContext& ctx); - void EmitFPSqrt(EmitContext& ctx); - void EmitFPSin(EmitContext& ctx); - void EmitFPSinNotReduced(EmitContext& ctx); - void EmitFPExp2(EmitContext& ctx); - void EmitFPExp2NotReduced(EmitContext& ctx); - void EmitFPCos(EmitContext& ctx); - void EmitFPCosNotReduced(EmitContext& ctx); - void EmitFPLog2(EmitContext& ctx); - void EmitFPSaturate16(EmitContext& ctx); - void EmitFPSaturate32(EmitContext& ctx); - void EmitFPSaturate64(EmitContext& ctx); - void EmitFPRoundEven16(EmitContext& ctx); - void EmitFPRoundEven32(EmitContext& ctx); - void EmitFPRoundEven64(EmitContext& ctx); - void EmitFPFloor16(EmitContext& ctx); - void EmitFPFloor32(EmitContext& ctx); - void EmitFPFloor64(EmitContext& ctx); - void EmitFPCeil16(EmitContext& ctx); - void EmitFPCeil32(EmitContext& ctx); - void EmitFPCeil64(EmitContext& ctx); - void EmitFPTrunc16(EmitContext& ctx); - void EmitFPTrunc32(EmitContext& ctx); - void EmitFPTrunc64(EmitContext& ctx); - Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); - void EmitIAdd64(EmitContext& ctx); - Id EmitISub32(EmitContext& ctx, Id a, Id b); - void EmitISub64(EmitContext& ctx); - Id EmitIMul32(EmitContext& ctx, Id a, Id b); - void EmitINeg32(EmitContext& ctx); - void EmitIAbs32(EmitContext& ctx); - Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); - void EmitShiftRightLogical32(EmitContext& ctx); - void EmitShiftRightArithmetic32(EmitContext& ctx); - void EmitBitwiseAnd32(EmitContext& ctx); - void EmitBitwiseOr32(EmitContext& ctx); - void EmitBitwiseXor32(EmitContext& ctx); - void EmitBitFieldInsert(EmitContext& ctx); - void EmitBitFieldSExtract(EmitContext& ctx); - Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); - Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitULessThan(EmitContext& ctx); - void EmitIEqual(EmitContext& ctx); - void EmitSLessThanEqual(EmitContext& ctx); - void EmitULessThanEqual(EmitContext& ctx); - Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); - void EmitUGreaterThan(EmitContext& ctx); - void EmitINotEqual(EmitContext& ctx); - void EmitSGreaterThanEqual(EmitContext& ctx); - Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); - void EmitLogicalOr(EmitContext& ctx); - void EmitLogicalAnd(EmitContext& ctx); - void EmitLogicalXor(EmitContext& ctx); - void EmitLogicalNot(EmitContext& ctx); - void EmitConvertS16F16(EmitContext& ctx); - void EmitConvertS16F32(EmitContext& ctx); - void EmitConvertS16F64(EmitContext& ctx); - void EmitConvertS32F16(EmitContext& ctx); - void EmitConvertS32F32(EmitContext& ctx); - void EmitConvertS32F64(EmitContext& ctx); - void EmitConvertS64F16(EmitContext& ctx); - void EmitConvertS64F32(EmitContext& ctx); - void EmitConvertS64F64(EmitContext& ctx); - void EmitConvertU16F16(EmitContext& ctx); - void EmitConvertU16F32(EmitContext& ctx); - void EmitConvertU16F64(EmitContext& ctx); - void EmitConvertU32F16(EmitContext& ctx); - void EmitConvertU32F32(EmitContext& ctx); - void EmitConvertU32F64(EmitContext& ctx); - void EmitConvertU64F16(EmitContext& ctx); - void EmitConvertU64F32(EmitContext& ctx); - void EmitConvertU64F64(EmitContext& ctx); - void EmitConvertU64U32(EmitContext& ctx); - void EmitConvertU32U64(EmitContext& ctx); -}; +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, IR::Block* label); +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, + IR::Block* false_label); +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label); +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label); +void EmitReturn(EmitContext& ctx); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx); +void EmitSetAttribute(EmitContext& ctx); +void EmitGetAttributeIndexed(EmitContext& ctx); +void EmitSetAttributeIndexed(EmitContext& ctx); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx); +void EmitLoadGlobal64(EmitContext& ctx); +void EmitLoadGlobal128(EmitContext& ctx); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx); +void EmitWriteGlobal64(EmitContext& ctx); +void EmitWriteGlobal128(EmitContext& ctx); +void EmitLoadStorageU8(EmitContext& ctx); +void EmitLoadStorageS8(EmitContext& ctx); +void EmitLoadStorageU16(EmitContext& ctx); +void EmitLoadStorageS16(EmitContext& ctx); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx); +void EmitLoadStorage128(EmitContext& ctx); +void EmitWriteStorageU8(EmitContext& ctx); +void EmitWriteStorageS8(EmitContext& ctx); +void EmitWriteStorageU16(EmitContext& ctx); +void EmitWriteStorageS16(EmitContext& ctx); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage128(EmitContext& ctx); +void EmitCompositeConstructU32x2(EmitContext& ctx); +void EmitCompositeConstructU32x3(EmitContext& ctx); +void EmitCompositeConstructU32x4(EmitContext& ctx); +void EmitCompositeExtractU32x2(EmitContext& ctx); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx); +void EmitCompositeConstructF16x2(EmitContext& ctx); +void EmitCompositeConstructF16x3(EmitContext& ctx); +void EmitCompositeConstructF16x4(EmitContext& ctx); +void EmitCompositeExtractF16x2(EmitContext& ctx); +void EmitCompositeExtractF16x3(EmitContext& ctx); +void EmitCompositeExtractF16x4(EmitContext& ctx); +void EmitCompositeConstructF32x2(EmitContext& ctx); +void EmitCompositeConstructF32x3(EmitContext& ctx); +void EmitCompositeConstructF32x4(EmitContext& ctx); +void EmitCompositeExtractF32x2(EmitContext& ctx); +void EmitCompositeExtractF32x3(EmitContext& ctx); +void EmitCompositeExtractF32x4(EmitContext& ctx); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitSelect8(EmitContext& ctx); +void EmitSelect16(EmitContext& ctx); +void EmitSelect32(EmitContext& ctx); +void EmitSelect64(EmitContext& ctx); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx); +void EmitUnpackUint2x32(EmitContext& ctx); +void EmitPackFloat2x16(EmitContext& ctx); +void EmitUnpackFloat2x16(EmitContext& ctx); +void EmitPackDouble2x32(EmitContext& ctx); +void EmitUnpackDouble2x32(EmitContext& ctx); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx); +void EmitFPAbs32(EmitContext& ctx); +void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +void EmitFPMax32(EmitContext& ctx); +void EmitFPMax64(EmitContext& ctx); +void EmitFPMin32(EmitContext& ctx); +void EmitFPMin64(EmitContext& ctx); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitFPNeg16(EmitContext& ctx); +void EmitFPNeg32(EmitContext& ctx); +void EmitFPNeg64(EmitContext& ctx); +void EmitFPRecip32(EmitContext& ctx); +void EmitFPRecip64(EmitContext& ctx); +void EmitFPRecipSqrt32(EmitContext& ctx); +void EmitFPRecipSqrt64(EmitContext& ctx); +void EmitFPSqrt(EmitContext& ctx); +void EmitFPSin(EmitContext& ctx); +void EmitFPSinNotReduced(EmitContext& ctx); +void EmitFPExp2(EmitContext& ctx); +void EmitFPExp2NotReduced(EmitContext& ctx); +void EmitFPCos(EmitContext& ctx); +void EmitFPCosNotReduced(EmitContext& ctx); +void EmitFPLog2(EmitContext& ctx); +void EmitFPSaturate16(EmitContext& ctx); +void EmitFPSaturate32(EmitContext& ctx); +void EmitFPSaturate64(EmitContext& ctx); +void EmitFPRoundEven16(EmitContext& ctx); +void EmitFPRoundEven32(EmitContext& ctx); +void EmitFPRoundEven64(EmitContext& ctx); +void EmitFPFloor16(EmitContext& ctx); +void EmitFPFloor32(EmitContext& ctx); +void EmitFPFloor64(EmitContext& ctx); +void EmitFPCeil16(EmitContext& ctx); +void EmitFPCeil32(EmitContext& ctx); +void EmitFPCeil64(EmitContext& ctx); +void EmitFPTrunc16(EmitContext& ctx); +void EmitFPTrunc32(EmitContext& ctx); +void EmitFPTrunc64(EmitContext& ctx); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +void EmitIAdd64(EmitContext& ctx); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +void EmitISub64(EmitContext& ctx); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +void EmitINeg32(EmitContext& ctx); +void EmitIAbs32(EmitContext& ctx); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +void EmitShiftRightLogical32(EmitContext& ctx); +void EmitShiftRightArithmetic32(EmitContext& ctx); +void EmitBitwiseAnd32(EmitContext& ctx); +void EmitBitwiseOr32(EmitContext& ctx); +void EmitBitwiseXor32(EmitContext& ctx); +void EmitBitFieldInsert(EmitContext& ctx); +void EmitBitFieldSExtract(EmitContext& ctx); +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitULessThan(EmitContext& ctx); +void EmitIEqual(EmitContext& ctx); +void EmitSLessThanEqual(EmitContext& ctx); +void EmitULessThanEqual(EmitContext& ctx); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +void EmitUGreaterThan(EmitContext& ctx); +void EmitINotEqual(EmitContext& ctx); +void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +void EmitLogicalOr(EmitContext& ctx); +void EmitLogicalAnd(EmitContext& ctx); +void EmitLogicalXor(EmitContext& ctx); +void EmitLogicalNot(EmitContext& ctx); +void EmitConvertS16F16(EmitContext& ctx); +void EmitConvertS16F32(EmitContext& ctx); +void EmitConvertS16F64(EmitContext& ctx); +void EmitConvertS32F16(EmitContext& ctx); +void EmitConvertS32F32(EmitContext& ctx); +void EmitConvertS32F64(EmitContext& ctx); +void EmitConvertS64F16(EmitContext& ctx); +void EmitConvertS64F32(EmitContext& ctx); +void EmitConvertS64F64(EmitContext& ctx); +void EmitConvertU16F16(EmitContext& ctx); +void EmitConvertU16F32(EmitContext& ctx); +void EmitConvertU16F64(EmitContext& ctx); +void EmitConvertU32F16(EmitContext& ctx); +void EmitConvertU32F32(EmitContext& ctx); +void EmitConvertU32F64(EmitContext& ctx); +void EmitConvertU64F16(EmitContext& ctx); +void EmitConvertU64F32(EmitContext& ctx); +void EmitConvertU64F64(EmitContext& ctx); +void EmitConvertU64U32(EmitContext& ctx); +void EmitConvertU32U64(EmitContext& ctx); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index af82df99c..49c200498 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -6,51 +6,51 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBitCastU16F16(EmitContext&) { +void EmitBitCastU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastU32F32(EmitContext& ctx, Id value) { +Id EmitBitCastU32F32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.U32[1], value); } -void EmitSPIRV::EmitBitCastU64F64(EmitContext&) { +void EmitBitCastU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitCastF16U16(EmitContext&) { +void EmitBitCastF16U16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitCastF32U32(EmitContext& ctx, Id value) { +Id EmitBitCastF32U32(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[1], value); } -void EmitSPIRV::EmitBitCastF64U64(EmitContext&) { +void EmitBitCastF64U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackUint2x32(EmitContext&) { +void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackUint2x32(EmitContext&) { +void EmitUnpackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackFloat2x16(EmitContext&) { +void EmitPackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackFloat2x16(EmitContext&) { +void EmitUnpackFloat2x16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitPackDouble2x32(EmitContext&) { +void EmitPackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitUnpackDouble2x32(EmitContext&) { +void EmitUnpackDouble2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index a7374c89d..348e4796d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,99 +6,99 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitCompositeConstructU32x2(EmitContext&) { +void EmitCompositeConstructU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x3(EmitContext&) { +void EmitCompositeConstructU32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructU32x4(EmitContext&) { +void EmitCompositeConstructU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractU32x2(EmitContext&) { +void EmitCompositeExtractU32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { return ctx.OpCompositeExtract(ctx.U32[1], vector, index); } -void EmitSPIRV::EmitCompositeExtractU32x4(EmitContext&) { +void EmitCompositeExtractU32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x2(EmitContext&) { +void EmitCompositeConstructF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x3(EmitContext&) { +void EmitCompositeConstructF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF16x4(EmitContext&) { +void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x2(EmitContext&) { +void EmitCompositeExtractF16x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x3(EmitContext&) { +void EmitCompositeExtractF16x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF16x4(EmitContext&) { +void EmitCompositeExtractF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x2(EmitContext&) { +void EmitCompositeConstructF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x3(EmitContext&) { +void EmitCompositeConstructF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF32x4(EmitContext&) { +void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x2(EmitContext&) { +void EmitCompositeExtractF32x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x3(EmitContext&) { +void EmitCompositeExtractF32x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF32x4(EmitContext&) { +void EmitCompositeExtractF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x2(EmitContext&) { +void EmitCompositeConstructF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x3(EmitContext&) { +void EmitCompositeConstructF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeConstructF64x4(EmitContext&) { +void EmitCompositeConstructF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x2(EmitContext&) { +void EmitCompositeExtractF64x2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x3(EmitContext&) { +void EmitCompositeExtractF64x3(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitCompositeExtractF64x4(EmitContext&) { +void EmitCompositeExtractF64x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index f4c9970eb..eb9c01c5a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,31 +6,31 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitGetRegister(EmitContext&) { +void EmitGetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetRegister(EmitContext&) { +void EmitSetRegister(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetPred(EmitContext&) { +void EmitGetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetPred(EmitContext&) { +void EmitSetPred(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetGotoVariable(EmitContext&) { +void EmitSetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetGotoVariable(EmitContext&) { +void EmitGetGotoVariable(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { +Id EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Constant buffer indexing"); } @@ -43,59 +43,59 @@ Id EmitSPIRV::EmitGetCbuf(EmitContext& ctx, const IR::Value& binding, const IR:: return ctx.OpLoad(ctx.U32[1], access_chain); } -void EmitSPIRV::EmitGetAttribute(EmitContext&) { +void EmitGetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttribute(EmitContext&) { +void EmitSetAttribute(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetAttributeIndexed(EmitContext&) { +void EmitGetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetAttributeIndexed(EmitContext&) { +void EmitSetAttributeIndexed(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetZFlag(EmitContext&) { +void EmitGetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetSFlag(EmitContext&) { +void EmitGetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetCFlag(EmitContext&) { +void EmitGetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitGetOFlag(EmitContext&) { +void EmitGetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetZFlag(EmitContext&) { +void EmitSetZFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetSFlag(EmitContext&) { +void EmitSetSFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetCFlag(EmitContext&) { +void EmitSetCFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSetOFlag(EmitContext&) { +void EmitSetOFlag(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitWorkgroupId(EmitContext& ctx) { +Id EmitWorkgroupId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); } -Id EmitSPIRV::EmitLocalInvocationId(EmitContext& ctx) { +Id EmitLocalInvocationId(EmitContext& ctx) { return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 549c1907a..6c4199664 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -6,25 +6,25 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitBranch(EmitContext& ctx, IR::Block* label) { +void EmitBranch(EmitContext& ctx, IR::Block* label) { ctx.OpBranch(label->Definition()); } -void EmitSPIRV::EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, +void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } -void EmitSPIRV::EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { +void EmitLoopMerge(EmitContext& ctx, IR::Block* merge_label, IR::Block* continue_label) { ctx.OpLoopMerge(merge_label->Definition(), continue_label->Definition(), spv::LoopControlMask::MaskNone); } -void EmitSPIRV::EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { +void EmitSelectionMerge(EmitContext& ctx, IR::Block* merge_label) { ctx.OpSelectionMerge(merge_label->Definition(), spv::SelectionControlMask::MaskNone); } -void EmitSPIRV::EmitReturn(EmitContext& ctx) { +void EmitReturn(EmitContext& ctx) { ctx.OpReturn(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9bc121f8..d24fbb353 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -33,187 +33,187 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { } // Anonymous namespace -void EmitSPIRV::EmitFPAbs16(EmitContext&) { +void EmitFPAbs16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs32(EmitContext&) { +void EmitFPAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPAbs64(EmitContext&) { +void EmitFPAbs64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); } -Id EmitSPIRV::EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); } -Id EmitSPIRV::EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitSPIRV::EmitFPMax32(EmitContext&) { +void EmitFPMax32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMax64(EmitContext&) { +void EmitFPMax64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin32(EmitContext&) { +void EmitFPMin32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPMin64(EmitContext&) { +void EmitFPMin64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); } -Id EmitSPIRV::EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); } -Id EmitSPIRV::EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitSPIRV::EmitFPNeg16(EmitContext&) { +void EmitFPNeg16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg32(EmitContext&) { +void EmitFPNeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPNeg64(EmitContext&) { +void EmitFPNeg64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip32(EmitContext&) { +void EmitFPRecip32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecip64(EmitContext&) { +void EmitFPRecip64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt32(EmitContext&) { +void EmitFPRecipSqrt32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRecipSqrt64(EmitContext&) { +void EmitFPRecipSqrt64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSqrt(EmitContext&) { +void EmitFPSqrt(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSin(EmitContext&) { +void EmitFPSin(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSinNotReduced(EmitContext&) { +void EmitFPSinNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2(EmitContext&) { +void EmitFPExp2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPExp2NotReduced(EmitContext&) { +void EmitFPExp2NotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCos(EmitContext&) { +void EmitFPCos(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCosNotReduced(EmitContext&) { +void EmitFPCosNotReduced(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPLog2(EmitContext&) { +void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate16(EmitContext&) { +void EmitFPSaturate16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate32(EmitContext&) { +void EmitFPSaturate32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPSaturate64(EmitContext&) { +void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven16(EmitContext&) { +void EmitFPRoundEven16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven32(EmitContext&) { +void EmitFPRoundEven32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPRoundEven64(EmitContext&) { +void EmitFPRoundEven64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor16(EmitContext&) { +void EmitFPFloor16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor32(EmitContext&) { +void EmitFPFloor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPFloor64(EmitContext&) { +void EmitFPFloor64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil16(EmitContext&) { +void EmitFPCeil16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil32(EmitContext&) { +void EmitFPCeil32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPCeil64(EmitContext&) { +void EmitFPCeil64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc16(EmitContext&) { +void EmitFPTrunc16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc32(EmitContext&) { +void EmitFPTrunc32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitFPTrunc64(EmitContext&) { +void EmitFPTrunc64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 32af94a73..a1d16b81e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -6,126 +6,126 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { if (inst->HasAssociatedPseudoOperation()) { throw NotImplementedException("Pseudo-operations on IAdd32"); } return ctx.OpIAdd(ctx.U32[1], a, b); } -void EmitSPIRV::EmitIAdd64(EmitContext&) { +void EmitIAdd64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitISub32(EmitContext& ctx, Id a, Id b) { +Id EmitISub32(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U32[1], a, b); } -void EmitSPIRV::EmitISub64(EmitContext&) { +void EmitISub64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitIMul32(EmitContext& ctx, Id a, Id b) { +Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } -void EmitSPIRV::EmitINeg32(EmitContext&) { +void EmitINeg32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIAbs32(EmitContext&) { +void EmitIAbs32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); } -void EmitSPIRV::EmitShiftRightLogical32(EmitContext&) { +void EmitShiftRightLogical32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitShiftRightArithmetic32(EmitContext&) { +void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseAnd32(EmitContext&) { +void EmitBitwiseAnd32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseOr32(EmitContext&) { +void EmitBitwiseOr32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitwiseXor32(EmitContext&) { +void EmitBitwiseXor32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldInsert(EmitContext&) { +void EmitBitFieldInsert(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitBitFieldSExtract(EmitContext&) { +void EmitBitFieldSExtract(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { +Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count) { return ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count); } -Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitULessThan(EmitContext&) { +void EmitULessThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitIEqual(EmitContext&) { +void EmitIEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSLessThanEqual(EmitContext&) { +void EmitSLessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitULessThanEqual(EmitContext&) { +void EmitULessThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitUGreaterThan(EmitContext&) { +void EmitUGreaterThan(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitINotEqual(EmitContext&) { +void EmitINotEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSGreaterThanEqual(EmitContext&) { +void EmitSGreaterThanEqual(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitSPIRV::EmitLogicalOr(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalAnd(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalXor(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLogicalNot(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index 7b43c4ed8..ff2f4fb74 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,83 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitConvertS16F16(EmitContext&) { +void EmitConvertS16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F32(EmitContext&) { +void EmitConvertS16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS16F64(EmitContext&) { +void EmitConvertS16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F16(EmitContext&) { +void EmitConvertS32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F32(EmitContext&) { +void EmitConvertS32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS32F64(EmitContext&) { +void EmitConvertS32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F16(EmitContext&) { +void EmitConvertS64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F32(EmitContext&) { +void EmitConvertS64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertS64F64(EmitContext&) { +void EmitConvertS64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F16(EmitContext&) { +void EmitConvertU16F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F32(EmitContext&) { +void EmitConvertU16F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU16F64(EmitContext&) { +void EmitConvertU16F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F16(EmitContext&) { +void EmitConvertU32F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F32(EmitContext&) { +void EmitConvertU32F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32F64(EmitContext&) { +void EmitConvertU32F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F16(EmitContext&) { +void EmitConvertU64F16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F32(EmitContext&) { +void EmitConvertU64F32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64F64(EmitContext&) { +void EmitConvertU64F64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU64U32(EmitContext&) { +void EmitConvertU64U32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitConvertU32U64(EmitContext&) { +void EmitConvertU32U64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 5769a3c95..77d698ffd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -22,79 +22,79 @@ static Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } -void EmitSPIRV::EmitLoadGlobalU8(EmitContext&) { +void EmitLoadGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS8(EmitContext&) { +void EmitLoadGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalU16(EmitContext&) { +void EmitLoadGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobalS16(EmitContext&) { +void EmitLoadGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal32(EmitContext&) { +void EmitLoadGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal64(EmitContext&) { +void EmitLoadGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadGlobal128(EmitContext&) { +void EmitLoadGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU8(EmitContext&) { +void EmitWriteGlobalU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS8(EmitContext&) { +void EmitWriteGlobalS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalU16(EmitContext&) { +void EmitWriteGlobalU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobalS16(EmitContext&) { +void EmitWriteGlobalS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal32(EmitContext&) { +void EmitWriteGlobal32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal64(EmitContext&) { +void EmitWriteGlobal64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteGlobal128(EmitContext&) { +void EmitWriteGlobal128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU8(EmitContext&) { +void EmitLoadStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS8(EmitContext&) { +void EmitLoadStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageU16(EmitContext&) { +void EmitLoadStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorageS16(EmitContext&) { +void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -105,31 +105,31 @@ Id EmitSPIRV::EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, return ctx.OpLoad(ctx.U32[1], pointer); } -void EmitSPIRV::EmitLoadStorage64(EmitContext&) { +void EmitLoadStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitLoadStorage128(EmitContext&) { +void EmitLoadStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU8(EmitContext&) { +void EmitWriteStorageU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS8(EmitContext&) { +void EmitWriteStorageS8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageU16(EmitContext&) { +void EmitWriteStorageU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorageS16(EmitContext&) { +void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); @@ -140,11 +140,11 @@ void EmitSPIRV::EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitSPIRV::EmitWriteStorage64(EmitContext&) { +void EmitWriteStorage64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitWriteStorage128(EmitContext&) { +void EmitWriteStorage128(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 40a856f72..8d5062724 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,19 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitSPIRV::EmitSelect8(EmitContext&) { +void EmitSelect8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect16(EmitContext&) { +void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect32(EmitContext&) { +void EmitSelect32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSPIRV::EmitSelect64(EmitContext&) { +void EmitSelect64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index c1ed8f281..19b06dbe4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -6,23 +6,23 @@ namespace Shader::Backend::SPIRV { -Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { +Id EmitUndefU1(EmitContext& ctx) { return ctx.OpUndef(ctx.U1); } -Id EmitSPIRV::EmitUndefU8(EmitContext&) { +Id EmitUndefU8(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU16(EmitContext&) { +Id EmitUndefU16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { +Id EmitUndefU32(EmitContext& ctx) { return ctx.OpUndef(ctx.U32[1]); } -Id EmitSPIRV::EmitUndefU64(EmitContext&) { +Id EmitUndefU64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index f6230e817..0ba681fb9 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -1,5 +1,7 @@ #pragma once +#include + #include "common/common_types.h" namespace Shader { @@ -8,7 +10,9 @@ class Environment { public: virtual ~Environment() = default; - [[nodiscard]] virtual u64 ReadInstruction(u32 address) const = 0; + [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; + + [[nodiscard]] virtual std::array WorkgroupSize() = 0; }; } // namespace Shader diff --git a/src/shader_recompiler/file_environment.cpp b/src/shader_recompiler/file_environment.cpp index b34bf462b..5127523f9 100644 --- a/src/shader_recompiler/file_environment.cpp +++ b/src/shader_recompiler/file_environment.cpp @@ -29,7 +29,7 @@ FileEnvironment::FileEnvironment(const char* path) { FileEnvironment::~FileEnvironment() = default; -u64 FileEnvironment::ReadInstruction(u32 offset) const { +u64 FileEnvironment::ReadInstruction(u32 offset) { if (offset % 8 != 0) { throw InvalidArgument("offset={} is not aligned to 8", offset); } @@ -39,4 +39,8 @@ u64 FileEnvironment::ReadInstruction(u32 offset) const { return data[offset / 8]; } +std::array FileEnvironment::WorkgroupSize() { + return {1, 1, 1}; +} + } // namespace Shader diff --git a/src/shader_recompiler/file_environment.h b/src/shader_recompiler/file_environment.h index c294bc6fa..b8c4bbadd 100644 --- a/src/shader_recompiler/file_environment.h +++ b/src/shader_recompiler/file_environment.h @@ -12,7 +12,9 @@ public: explicit FileEnvironment(const char* path); ~FileEnvironment() override; - u64 ReadInstruction(u32 offset) const override; + u64 ReadInstruction(u32 offset) override; + + std::array WorkgroupSize() override; private: std::vector data; diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp index 5ae91dd7d..ec029dfd6 100644 --- a/src/shader_recompiler/frontend/ir/basic_block.cpp +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp @@ -127,6 +127,8 @@ static std::string ArgToIndex(const std::map& block_to_ind return fmt::format("#{}", arg.U32()); case Type::U64: return fmt::format("#{}", arg.U64()); + case Type::F32: + return fmt::format("#{}", arg.F32()); case Type::Reg: return fmt::format("{}", arg.Reg()); case Type::Pred: diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp index a48b8dec5..8709a2ea1 100644 --- a/src/shader_recompiler/frontend/ir/post_order.cpp +++ b/src/shader_recompiler/frontend/ir/post_order.cpp @@ -28,7 +28,7 @@ BlockList PostOrder(const BlockList& blocks) { if (!visited.insert(branch).second) { return false; } - // Calling push_back twice is faster than insert on msvc + // Calling push_back twice is faster than insert on MSVC block_stack.push_back(block); block_stack.push_back(branch); return true; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8331d576c..8c44ebb29 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -69,7 +69,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool(value)); } +IR::U32 TranslatorVisitor::GetReg8(u64 insn) { + union { + u64 raw; + BitField<8, 8, IR::Reg> index; + } const reg{insn}; + return X(reg.index); +} + IR::U32 TranslatorVisitor::GetReg20(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index b701605d7..8bd468244 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -301,6 +301,7 @@ public: void X(IR::Reg dest_reg, const IR::U32& value); void F(IR::Reg dest_reg, const IR::F32& value); + [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::F32 GetReg20F(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 1f83d1068..c3c4b9abd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp @@ -10,36 +10,35 @@ namespace Shader::Maxwell { namespace { -union MOV { - u64 raw; - BitField<0, 8, IR::Reg> dest_reg; - BitField<20, 8, IR::Reg> src_reg; - BitField<39, 4, u64> mask; -}; - -void CheckMask(MOV mov) { - if (mov.mask != 0xf) { +void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 4, u64> mask; + BitField<12, 4, u64> mov32i_mask; + } const mov{insn}; + + if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { throw NotImplementedException("Non-full move mask"); } + v.X(mov.dest_reg, src); } } // Anonymous namespace void TranslatorVisitor::MOV_reg(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, X(mov.src_reg)); + MOV(*this, insn, GetReg8(insn)); } void TranslatorVisitor::MOV_cbuf(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetCbuf(insn)); + MOV(*this, insn, GetCbuf(insn)); } void TranslatorVisitor::MOV_imm(u64 insn) { - const MOV mov{insn}; - CheckMask(mov); - X(mov.dest_reg, GetImm20(insn)); + MOV(*this, insn, GetImm20(insn)); +} + +void TranslatorVisitor::MOV32I(u64 insn) { + MOV(*this, insn, GetImm32(insn), true); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 1bb160acb..6b2a1356b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -617,10 +617,6 @@ void TranslatorVisitor::MEMBAR(u64) { ThrowNotImplemented(Opcode::MEMBAR); } -void TranslatorVisitor::MOV32I(u64) { - ThrowNotImplemented(Opcode::MOV32I); -} - void TranslatorVisitor::NOP(u64) { ThrowNotImplemented(Opcode::NOP); } diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 1610bb34e..050a37f18 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -76,5 +76,5 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - Backend::SPIRV::EmitSPIRV spirv{program}; + void(Backend::SPIRV::EmitSPIRV(env, program)); } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..c96d783b7 --- /dev/null +++ b/src/shader_recompiler/profile.h @@ -0,0 +1,13 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +struct Profile { + bool unified_descriptor_binding; +}; + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp new file mode 100644 index 000000000..b25081e39 --- /dev/null +++ b/src/shader_recompiler/recompiler.cpp @@ -0,0 +1,27 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/object_pool.h" +#include "shader_recompiler/recompiler.h" + +namespace Shader { + +std::pair> RecompileSPIRV(Environment& env, u32 start_address) { + ObjectPool flow_block_pool; + ObjectPool inst_pool; + ObjectPool block_pool; + + Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; + IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; +} + +} // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h new file mode 100644 index 000000000..4cb973878 --- /dev/null +++ b/src/shader_recompiler/recompiler.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/shader_info.h" + +namespace Shader { + +[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); + +} // namespace Shader diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c5ce71706..3323e6916 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,9 +43,6 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_dma.cpp engines/maxwell_dma.h - engines/shader_bytecode.h - engines/shader_header.h - engines/shader_type.h framebuffer_config.h macro/macro.cpp macro/macro.h @@ -123,6 +120,7 @@ add_library(video_core STATIC renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_pipeline.h renderer_vulkan/vk_query_cache.cpp renderer_vulkan/vk_query_cache.h renderer_vulkan/vk_rasterizer.cpp @@ -201,7 +199,7 @@ add_library(video_core STATIC create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad xbyak) +target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak) if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) add_dependencies(video_core ffmpeg-build) diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 0d7683c2d..f8b8d06ac 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -12,7 +12,6 @@ #include "common/common_types.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h deleted file mode 100644 index 8b45f1b62..000000000 --- a/src/video_core/engines/shader_bytecode.h +++ /dev/null @@ -1,2298 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -struct Register { - /// Number of registers - static constexpr std::size_t NumRegisters = 256; - - /// Register 255 is special cased to always be 0 - static constexpr std::size_t ZeroIndex = 255; - - enum class Size : u64 { - Byte = 0, - Short = 1, - Word = 2, - Long = 3, - }; - - constexpr Register() = default; - - constexpr Register(u64 value_) : value(value_) {} - - [[nodiscard]] constexpr operator u64() const { - return value; - } - - template - [[nodiscard]] constexpr u64 operator-(const T& oth) const { - return value - oth; - } - - template - [[nodiscard]] constexpr u64 operator&(const T& oth) const { - return value & oth; - } - - [[nodiscard]] constexpr u64 operator&(const Register& oth) const { - return value & oth.value; - } - - [[nodiscard]] constexpr u64 operator~() const { - return ~value; - } - - [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { - elem = (value + elem) & 3; - return (value & ~3) + elem; - } - -private: - u64 value{}; -}; - -enum class AttributeSize : u64 { - Word = 0, - DoubleWord = 1, - TripleWord = 2, - QuadWord = 3, -}; - -union Attribute { - Attribute() = default; - - constexpr explicit Attribute(u64 value_) : value(value_) {} - - enum class Index : u64 { - LayerViewportPointSize = 6, - Position = 7, - Attribute_0 = 8, - Attribute_31 = 39, - FrontColor = 40, - FrontSecondaryColor = 41, - BackColor = 42, - BackSecondaryColor = 43, - ClipDistances0123 = 44, - ClipDistances4567 = 45, - PointCoord = 46, - // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex - // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval - // shader. - TessCoordInstanceIDVertexID = 47, - TexCoord_0 = 48, - TexCoord_7 = 55, - // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment - // shader. It is unknown what the other values contain. - FrontFacing = 63, - }; - - union { - BitField<20, 10, u64> immediate; - BitField<22, 2, u64> element; - BitField<24, 6, Index> index; - BitField<31, 1, u64> patch; - BitField<47, 3, AttributeSize> size; - - [[nodiscard]] bool IsPhysical() const { - return patch == 0 && element == 0 && static_cast(index.Value()) == 0; - } - } fmt20; - - union { - BitField<30, 2, u64> element; - BitField<32, 6, Index> index; - } fmt28; - - BitField<39, 8, u64> reg; - u64 value{}; -}; - -union Sampler { - Sampler() = default; - - constexpr explicit Sampler(u64 value_) : value(value_) {} - - enum class Index : u64 { - Sampler_0 = 8, - }; - - BitField<36, 13, Index> index; - u64 value{}; -}; - -union Image { - Image() = default; - - constexpr explicit Image(u64 value_) : value{value_} {} - - BitField<36, 13, u64> index; - u64 value; -}; - -} // namespace Tegra::Shader - -namespace std { - -// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330. -template <> -struct make_unsigned { - using type = Tegra::Shader::Attribute; -}; - -template <> -struct make_unsigned { - using type = Tegra::Shader::Register; -}; - -} // namespace std - -namespace Tegra::Shader { - -enum class Pred : u64 { - UnusedIndex = 0x7, - NeverExecute = 0xF, -}; - -enum class PredCondition : u64 { - F = 0, // Always false - LT = 1, // Ordered less than - EQ = 2, // Ordered equal - LE = 3, // Ordered less than or equal - GT = 4, // Ordered greater than - NE = 5, // Ordered not equal - GE = 6, // Ordered greater than or equal - NUM = 7, // Ordered - NAN_ = 8, // Unordered - LTU = 9, // Unordered less than - EQU = 10, // Unordered equal - LEU = 11, // Unordered less than or equal - GTU = 12, // Unordered greater than - NEU = 13, // Unordered not equal - GEU = 14, // Unordered greater than or equal - T = 15, // Always true -}; - -enum class PredOperation : u64 { - And = 0, - Or = 1, - Xor = 2, -}; - -enum class LogicOperation : u64 { - And = 0, - Or = 1, - Xor = 2, - PassB = 3, -}; - -enum class SubOp : u64 { - Cos = 0x0, - Sin = 0x1, - Ex2 = 0x2, - Lg2 = 0x3, - Rcp = 0x4, - Rsq = 0x5, - Sqrt = 0x8, -}; - -enum class F2iRoundingOp : u64 { - RoundEven = 0, - Floor = 1, - Ceil = 2, - Trunc = 3, -}; - -enum class F2fRoundingOp : u64 { - None = 0, - Pass = 3, - Round = 8, - Floor = 9, - Ceil = 10, - Trunc = 11, -}; - -enum class AtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, - SafeAdd = 10, -}; - -enum class GlobalAtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32_FTZ_RN = 3, - F16x2_FTZ_RN = 4, - S64 = 5, -}; - -enum class UniformType : u64 { - UnsignedByte = 0, - SignedByte = 1, - UnsignedShort = 2, - SignedShort = 3, - Single = 4, - Double = 5, - Quad = 6, - UnsignedQuad = 7, -}; - -enum class StoreType : u64 { - Unsigned8 = 0, - Signed8 = 1, - Unsigned16 = 2, - Signed16 = 3, - Bits32 = 4, - Bits64 = 5, - Bits128 = 6, -}; - -enum class AtomicType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - S64 = 3, -}; - -enum class IMinMaxExchange : u64 { - None = 0, - XLo = 1, - XMed = 2, - XHi = 3, -}; - -enum class VideoType : u64 { - Size16_Low = 0, - Size16_High = 1, - Size32 = 2, - Invalid = 3, -}; - -enum class VmadShr : u64 { - Shr7 = 1, - Shr15 = 2, -}; - -enum class VmnmxType : u64 { - Bits8, - Bits16, - Bits32, -}; - -enum class VmnmxOperation : u64 { - Mrg_16H = 0, - Mrg_16L = 1, - Mrg_8B0 = 2, - Mrg_8B2 = 3, - Acc = 4, - Min = 5, - Max = 6, - Nop = 7, -}; - -enum class XmadMode : u64 { - None = 0, - CLo = 1, - CHi = 2, - CSfu = 3, - CBcc = 4, -}; - -enum class IAdd3Mode : u64 { - None = 0, - RightShift = 1, - LeftShift = 2, -}; - -enum class IAdd3Height : u64 { - None = 0, - LowerHalfWord = 1, - UpperHalfWord = 2, -}; - -enum class FlowCondition : u64 { - Always = 0xF, - Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? -}; - -enum class ConditionCode : u64 { - F = 0, - LT = 1, - EQ = 2, - LE = 3, - GT = 4, - NE = 5, - GE = 6, - Num = 7, - Nan = 8, - LTU = 9, - EQU = 10, - LEU = 11, - GTU = 12, - NEU = 13, - GEU = 14, - T = 15, - OFF = 16, - LO = 17, - SFF = 18, - LS = 19, - HI = 20, - SFT = 21, - HS = 22, - OFT = 23, - CSM_TA = 24, - CSM_TR = 25, - CSM_MX = 26, - FCSM_TA = 27, - FCSM_TR = 28, - FCSM_MX = 29, - RLE = 30, - RGT = 31, -}; - -enum class PredicateResultMode : u64 { - None = 0x0, - NotZero = 0x3, -}; - -enum class TextureType : u64 { - Texture1D = 0, - Texture2D = 1, - Texture3D = 2, - TextureCube = 3, -}; - -enum class TextureQueryType : u64 { - Dimension = 1, - TextureType = 2, - SamplePosition = 5, - Filter = 16, - LevelOfDetail = 18, - Wrap = 20, - BorderColor = 22, -}; - -enum class TextureProcessMode : u64 { - None = 0, - LZ = 1, // Load LOD of zero. - LB = 2, // Load Bias. - LL = 3, // Load LOD. - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. -}; - -enum class TextureMiscMode : u64 { - DC, - AOFFI, // Uses Offset - NDV, - NODEP, - MZ, - PTP, -}; - -enum class SurfaceDataMode : u64 { - P = 0, - D_BA = 1, -}; - -enum class OutOfBoundsStore : u64 { - Ignore = 0, - Clamp = 1, - Trap = 2, -}; - -enum class ImageType : u64 { - Texture1D = 0, - TextureBuffer = 1, - Texture1DArray = 2, - Texture2D = 3, - Texture2DArray = 4, - Texture3D = 5, -}; - -enum class IsberdMode : u64 { - None = 0, - Patch = 1, - Prim = 2, - Attr = 3, -}; - -enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; - -enum class MembarType : u64 { - CTA = 0, - GL = 1, - SYS = 2, - VC = 3, -}; - -enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 }; - -enum class HalfType : u64 { - H0_H1 = 0, - F32 = 1, - H0_H0 = 2, - H1_H1 = 3, -}; - -enum class HalfMerge : u64 { - H0_H1 = 0, - F32 = 1, - Mrg_H0 = 2, - Mrg_H1 = 3, -}; - -enum class HalfPrecision : u64 { - None = 0, - FTZ = 1, - FMZ = 2, -}; - -enum class R2pMode : u64 { - Pr = 0, - Cc = 1, -}; - -enum class IpaInterpMode : u64 { - Pass = 0, - Multiply = 1, - Constant = 2, - Sc = 3, -}; - -enum class IpaSampleMode : u64 { - Default = 0, - Centroid = 1, - Offset = 2, -}; - -enum class LmemLoadCacheManagement : u64 { - Default = 0, - LU = 1, - CI = 2, - CV = 3, -}; - -enum class StoreCacheManagement : u64 { - Default = 0, - CG = 1, - CS = 2, - WT = 3, -}; - -struct IpaMode { - IpaInterpMode interpolation_mode; - IpaSampleMode sampling_mode; - - [[nodiscard]] bool operator==(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) == - std::tie(a.interpolation_mode, a.sampling_mode); - } - [[nodiscard]] bool operator!=(const IpaMode& a) const { - return !operator==(a); - } - [[nodiscard]] bool operator<(const IpaMode& a) const { - return std::tie(interpolation_mode, sampling_mode) < - std::tie(a.interpolation_mode, a.sampling_mode); - } -}; - -enum class SystemVariable : u64 { - LaneId = 0x00, - VirtCfg = 0x02, - VirtId = 0x03, - Pm0 = 0x04, - Pm1 = 0x05, - Pm2 = 0x06, - Pm3 = 0x07, - Pm4 = 0x08, - Pm5 = 0x09, - Pm6 = 0x0a, - Pm7 = 0x0b, - OrderingTicket = 0x0f, - PrimType = 0x10, - InvocationId = 0x11, - Ydirection = 0x12, - ThreadKill = 0x13, - ShaderType = 0x14, - DirectBeWriteAddressLow = 0x15, - DirectBeWriteAddressHigh = 0x16, - DirectBeWriteEnabled = 0x17, - MachineId0 = 0x18, - MachineId1 = 0x19, - MachineId2 = 0x1a, - MachineId3 = 0x1b, - Affinity = 0x1c, - InvocationInfo = 0x1d, - WscaleFactorXY = 0x1e, - WscaleFactorZ = 0x1f, - Tid = 0x20, - TidX = 0x21, - TidY = 0x22, - TidZ = 0x23, - CtaParam = 0x24, - CtaIdX = 0x25, - CtaIdY = 0x26, - CtaIdZ = 0x27, - NtId = 0x28, - CirQueueIncrMinusOne = 0x29, - Nlatc = 0x2a, - SmSpaVersion = 0x2c, - MultiPassShaderInfo = 0x2d, - LwinHi = 0x2e, - SwinHi = 0x2f, - SwinLo = 0x30, - SwinSz = 0x31, - SmemSz = 0x32, - SmemBanks = 0x33, - LwinLo = 0x34, - LwinSz = 0x35, - LmemLosz = 0x36, - LmemHioff = 0x37, - EqMask = 0x38, - LtMask = 0x39, - LeMask = 0x3a, - GtMask = 0x3b, - GeMask = 0x3c, - RegAlloc = 0x3d, - CtxAddr = 0x3e, // .fmask = F_SM50 - BarrierAlloc = 0x3e, // .fmask = F_SM60 - GlobalErrorStatus = 0x40, - WarpErrorStatus = 0x42, - WarpErrorStatusClear = 0x43, - PmHi0 = 0x48, - PmHi1 = 0x49, - PmHi2 = 0x4a, - PmHi3 = 0x4b, - PmHi4 = 0x4c, - PmHi5 = 0x4d, - PmHi6 = 0x4e, - PmHi7 = 0x4f, - ClockLo = 0x50, - ClockHi = 0x51, - GlobalTimerLo = 0x52, - GlobalTimerHi = 0x53, - HwTaskId = 0x60, - CircularQueueEntryIndex = 0x61, - CircularQueueEntryAddressLow = 0x62, - CircularQueueEntryAddressHigh = 0x63, -}; - -enum class PhysicalAttributeDirection : u64 { - Input = 0, - Output = 1, -}; - -enum class VoteOperation : u64 { - All = 0, // allThreadsNV - Any = 1, // anyThreadNV - Eq = 2, // allThreadsEqualNV -}; - -enum class ImageAtomicOperationType : u64 { - U32 = 0, - S32 = 1, - U64 = 2, - F32 = 3, - S64 = 5, - SD32 = 6, - SD64 = 7, -}; - -enum class ImageAtomicOperation : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, -}; - -enum class ShuffleOperation : u64 { - Idx = 0, // shuffleNV - Up = 1, // shuffleUpNV - Down = 2, // shuffleDownNV - Bfly = 3, // shuffleXorNV -}; - -enum class ShfType : u64 { - Bits32 = 0, - U64 = 2, - S64 = 3, -}; - -enum class ShfXmode : u64 { - None = 0, - HI = 1, - X = 2, - XHI = 3, -}; - -union Instruction { - constexpr Instruction& operator=(const Instruction& instr) { - value = instr.value; - return *this; - } - - constexpr Instruction(u64 value_) : value{value_} {} - constexpr Instruction(const Instruction& instr) : value(instr.value) {} - - [[nodiscard]] constexpr bool Bit(u64 offset) const { - return ((value >> offset) & 1) != 0; - } - - BitField<0, 8, Register> gpr0; - BitField<8, 8, Register> gpr8; - union { - BitField<16, 4, Pred> full_pred; - BitField<16, 3, u64> pred_index; - } pred; - BitField<19, 1, u64> negate_pred; - BitField<20, 8, Register> gpr20; - BitField<20, 4, SubOp> sub_op; - BitField<28, 8, Register> gpr28; - BitField<39, 8, Register> gpr39; - BitField<48, 16, u64> opcode; - - union { - BitField<8, 5, ConditionCode> cc; - BitField<13, 1, u64> trigger; - } nop; - - union { - BitField<48, 2, VoteOperation> operation; - BitField<45, 3, u64> dest_pred; - BitField<39, 3, u64> value; - BitField<42, 1, u64> negate_value; - } vote; - - union { - BitField<30, 2, ShuffleOperation> operation; - BitField<48, 3, u64> pred48; - BitField<28, 1, u64> is_index_imm; - BitField<29, 1, u64> is_mask_imm; - BitField<20, 5, u64> index_imm; - BitField<34, 13, u64> mask_imm; - } shfl; - - union { - BitField<44, 1, u64> ftz; - BitField<39, 2, u64> tab5cb8_2; - BitField<38, 1, u64> ndv; - BitField<47, 1, u64> cc; - BitField<28, 8, u64> swizzle; - } fswzadd; - - union { - BitField<8, 8, Register> gpr; - BitField<20, 24, s64> offset; - } gmem; - - union { - BitField<20, 16, u64> imm20_16; - BitField<20, 19, u64> imm20_19; - BitField<20, 32, s64> imm20_32; - BitField<45, 1, u64> negate_b; - BitField<46, 1, u64> abs_a; - BitField<48, 1, u64> negate_a; - BitField<49, 1, u64> abs_b; - BitField<50, 1, u64> saturate_d; - BitField<56, 1, u64> negate_imm; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - } fmnmx; - - union { - BitField<39, 1, u64> invert_a; - BitField<40, 1, u64> invert_b; - BitField<41, 2, LogicOperation> operation; - BitField<44, 2, PredicateResultMode> pred_result_mode; - BitField<48, 3, Pred> pred48; - } lop; - - union { - BitField<53, 2, LogicOperation> operation; - BitField<55, 1, u64> invert_a; - BitField<56, 1, u64> invert_b; - } lop32i; - - union { - BitField<28, 8, u64> imm_lut28; - BitField<48, 8, u64> imm_lut48; - - [[nodiscard]] u32 GetImmLut28() const { - return static_cast(imm_lut28); - } - - [[nodiscard]] u32 GetImmLut48() const { - return static_cast(imm_lut48); - } - } lop3; - - [[nodiscard]] u16 GetImm20_16() const { - return static_cast(imm20_16); - } - - [[nodiscard]] u32 GetImm20_19() const { - u32 imm{static_cast(imm20_19)}; - imm <<= 12; - imm |= negate_imm ? 0x80000000 : 0; - return imm; - } - - [[nodiscard]] u32 GetImm20_32() const { - return static_cast(imm20_32); - } - - [[nodiscard]] s32 GetSignedImm20_20() const { - const auto immediate = static_cast(imm20_19 | (negate_imm << 19)); - // Sign extend the 20-bit value. - const auto mask = 1U << (20 - 1); - return static_cast((immediate ^ mask) - mask); - } - } alu; - - union { - BitField<38, 1, u64> idx; - BitField<51, 1, u64> saturate; - BitField<52, 2, IpaSampleMode> sample_mode; - BitField<54, 2, IpaInterpMode> interp_mode; - } ipa; - - union { - BitField<39, 2, u64> tab5cb8_2; - BitField<41, 3, u64> postfactor; - BitField<44, 2, u64> tab5c68_0; - BitField<48, 1, u64> negate_b; - } fmul; - - union { - BitField<55, 1, u64> saturate; - } fmul32; - - union { - BitField<52, 1, u64> generates_cc; - } op_32; - - union { - BitField<48, 1, u64> is_signed; - } shift; - - union { - BitField<39, 1, u64> wrap; - } shr; - - union { - BitField<37, 2, ShfType> type; - BitField<48, 2, ShfXmode> xmode; - BitField<50, 1, u64> wrap; - BitField<20, 6, u64> immediate; - } shf; - - union { - BitField<39, 5, u64> shift_amount; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - } alu_integer; - - union { - BitField<43, 1, u64> x; - } iadd; - - union { - BitField<39, 1, u64> ftz; - BitField<32, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - - BitField<35, 2, HalfType> type_c; - } alu_half; - - union { - BitField<39, 2, HalfPrecision> precision; - BitField<39, 1, u64> ftz; - BitField<52, 1, u64> saturate; - BitField<49, 2, HalfMerge> merge; - - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - } alu_half_imm; - - union { - BitField<29, 1, u64> first_negate; - BitField<20, 9, u64> first; - - BitField<56, 1, u64> second_negate; - BitField<30, 9, u64> second; - - [[nodiscard]] u32 PackImmediates() const { - // Immediates are half floats shifted. - constexpr u32 imm_shift = 6; - return static_cast((first << imm_shift) | (second << (16 + imm_shift))); - } - } half_imm; - - union { - union { - BitField<37, 2, HalfPrecision> precision; - BitField<32, 1, u64> saturate; - - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> negate_c; - BitField<35, 2, HalfType> type_c; - } rr; - - BitField<57, 2, HalfPrecision> precision; - BitField<52, 1, u64> saturate; - - BitField<49, 2, HalfMerge> merge; - - BitField<47, 2, HalfType> type_a; - - BitField<56, 1, u64> negate_b; - BitField<28, 2, HalfType> type_b; - - BitField<51, 1, u64> negate_c; - BitField<53, 2, HalfType> type_reg39; - } hfma2; - - union { - BitField<40, 1, u64> invert; - } popc; - - union { - BitField<41, 1, u64> sh; - BitField<40, 1, u64> invert; - BitField<48, 1, u64> is_signed; - } flo; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> neg_pred; - } sel; - - union { - BitField<39, 3, u64> pred; - BitField<42, 1, u64> negate_pred; - BitField<43, 2, IMinMaxExchange> exchange; - BitField<48, 1, u64> is_signed; - } imnmx; - - union { - BitField<31, 2, IAdd3Height> height_c; - BitField<33, 2, IAdd3Height> height_b; - BitField<35, 2, IAdd3Height> height_a; - BitField<37, 2, IAdd3Mode> mode; - BitField<49, 1, u64> neg_c; - BitField<50, 1, u64> neg_b; - BitField<51, 1, u64> neg_a; - } iadd3; - - union { - BitField<54, 1, u64> saturate; - BitField<56, 1, u64> negate_a; - } iadd32i; - - union { - BitField<53, 1, u64> negate_b; - BitField<54, 1, u64> abs_a; - BitField<56, 1, u64> negate_a; - BitField<57, 1, u64> abs_b; - } fadd32i; - - union { - BitField<40, 1, u64> brev; - BitField<47, 1, u64> rd_cc; - BitField<48, 1, u64> is_signed; - } bfe; - - union { - BitField<48, 3, u64> pred48; - - union { - BitField<20, 20, u64> entry_a; - BitField<39, 5, u64> entry_b; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } imm; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<56, 1, u64> neg; - BitField<57, 1, u64> uses_cc; - } hi; - - union { - BitField<20, 14, u64> cb_index; - BitField<34, 5, u64> cb_offset; - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } rz; - - union { - BitField<39, 5, u64> entry_a; - BitField<45, 1, u64> neg; - BitField<46, 1, u64> uses_cc; - } r1; - - union { - BitField<28, 8, u64> entry_a; - BitField<37, 1, u64> neg; - BitField<38, 1, u64> uses_cc; - } r2; - - } lea; - - union { - BitField<0, 5, FlowCondition> cond; - } flow; - - union { - BitField<47, 1, u64> cc; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_c; - BitField<51, 2, u64> tab5980_1; - BitField<53, 2, u64> tab5980_0; - } ffma; - - union { - BitField<48, 3, UniformType> type; - BitField<44, 2, u64> unknown; - } ld_c; - - union { - BitField<48, 3, StoreType> type; - } ldst_sl; - - union { - BitField<44, 2, u64> unknown; - } ld_l; - - union { - BitField<44, 2, StoreCacheManagement> cache_management; - } st_l; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } ldg; - - union { - BitField<48, 3, UniformType> type; - BitField<46, 2, u64> cache_mode; - } stg; - - union { - BitField<23, 3, AtomicOp> operation; - BitField<48, 1, u64> extended; - BitField<20, 3, GlobalAtomicType> type; - } red; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<49, 3, GlobalAtomicType> type; - BitField<28, 20, s64> offset; - } atom; - - union { - BitField<52, 4, AtomicOp> operation; - BitField<28, 2, AtomicType> type; - BitField<30, 22, s64> offset; - - [[nodiscard]] s32 GetImmediateOffset() const { - return static_cast(offset << 2); - } - } atoms; - - union { - BitField<32, 1, PhysicalAttributeDirection> direction; - BitField<47, 3, AttributeSize> size; - BitField<20, 11, u64> address; - } al2p; - - union { - BitField<53, 3, UniformType> type; - BitField<52, 1, u64> extended; - } generic; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<6, 1, u64> neg_b; - BitField<7, 1, u64> abs_a; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fsetp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } isetp; - - union { - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } icmp; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 2, PredOperation> op; - } psetp; - - union { - BitField<43, 4, PredCondition> cond; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<39, 3, u64> pred39; - } vsetp; - - union { - BitField<12, 3, u64> pred12; - BitField<15, 1, u64> neg_pred12; - BitField<24, 2, PredOperation> cond; - BitField<29, 3, u64> pred29; - BitField<32, 1, u64> neg_pred29; - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - } pset; - - union { - BitField<0, 3, u64> pred0; - BitField<3, 3, u64> pred3; - BitField<8, 5, ConditionCode> cc; // flag in cc - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred39; - BitField<45, 4, PredOperation> op; // op with pred39 - } csetp; - - union { - BitField<6, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<3, 3, u64> pred3; - BitField<0, 3, u64> pred0; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - union { - BitField<35, 4, PredCondition> cond; - BitField<49, 1, u64> h_and; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - } reg; - union { - BitField<56, 1, u64> negate_b; - BitField<54, 1, u64> abs_b; - } cbuf; - union { - BitField<49, 4, PredCondition> cond; - BitField<53, 1, u64> h_and; - } cbuf_and_imm; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hsetp2; - - union { - BitField<40, 1, R2pMode> mode; - BitField<41, 2, u64> byte; - BitField<20, 7, u64> immediate_mask; - } p2r_r2p; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<43, 1, u64> neg_a; - BitField<44, 1, u64> abs_b; - BitField<45, 2, PredOperation> op; - BitField<48, 4, PredCondition> cond; - BitField<52, 1, u64> bf; - BitField<53, 1, u64> neg_b; - BitField<54, 1, u64> abs_a; - BitField<55, 1, u64> ftz; - } fset; - - union { - BitField<47, 1, u64> ftz; - BitField<48, 4, PredCondition> cond; - } fcmp; - - union { - BitField<49, 1, u64> bf; - BitField<35, 3, PredCondition> cond; - BitField<50, 1, u64> ftz; - BitField<45, 2, PredOperation> op; - BitField<43, 1, u64> negate_a; - BitField<44, 1, u64> abs_a; - BitField<47, 2, HalfType> type_a; - BitField<31, 1, u64> negate_b; - BitField<30, 1, u64> abs_b; - BitField<28, 2, HalfType> type_b; - BitField<42, 1, u64> neg_pred; - BitField<39, 3, u64> pred39; - } hset2; - - union { - BitField<39, 3, u64> pred39; - BitField<42, 1, u64> neg_pred; - BitField<44, 1, u64> bf; - BitField<45, 2, PredOperation> op; - BitField<48, 1, u64> is_signed; - BitField<49, 3, PredCondition> cond; - } iset; - - union { - BitField<45, 1, u64> negate_a; - BitField<49, 1, u64> abs_a; - BitField<10, 2, Register::Size> src_size; - BitField<13, 1, u64> is_input_signed; - BitField<8, 2, Register::Size> dst_size; - BitField<12, 1, u64> is_output_signed; - - union { - BitField<39, 2, u64> tab5cb8_2; - } i2f; - - union { - BitField<39, 2, F2iRoundingOp> rounding; - } f2i; - - union { - BitField<39, 4, u64> rounding; - // H0, H1 extract for F16 missing - BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - [[nodiscard]] F2fRoundingOp GetRoundingMode() const { - constexpr u64 rounding_mask = 0x0B; - return static_cast(rounding.Value() & rounding_mask); - } - } f2f; - - union { - BitField<41, 2, u64> selector; - } int_src; - - union { - BitField<41, 1, u64> selector; - } float_src; - } conversion; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 1, u64> aoffi_flag; - BitField<55, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<36, 1, u64> aoffi_flag; - BitField<37, 3, TextureProcessMode> process_mode; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tex_b; - - union { - BitField<22, 6, TextureQueryType> query_type; - BitField<31, 4, u64> component_mask; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - } txq; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<31, 4, u64> component_mask; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - return ((1ULL << component) & component_mask) != 0; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return (ndv_flag != 0); - case TextureMiscMode::NODEP: - return (nodep_flag != 0); - default: - break; - } - return false; - } - } tmml; - - union { - BitField<28, 1, u64> array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> offset_mode; - BitField<56, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4; - - union { - BitField<35, 1, u64> ndv_flag; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> offset_mode; - BitField<37, 2, u64> component; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::NDV: - return ndv_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::AOFFI: - return offset_mode == 1; - case TextureMiscMode::PTP: - return offset_mode == 2; - default: - break; - } - return false; - } - } tld4_b; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> dc_flag; - BitField<51, 1, u64> aoffi_flag; - BitField<52, 2, u64> component; - BitField<55, 1, u64> fp16_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return dc_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - default: - break; - } - return false; - } - } tld4s; - - union { - BitField<0, 8, Register> gpr0; - BitField<28, 8, Register> gpr28; - BitField<49, 1, u64> nodep_flag; - BitField<50, 3, u64> component_mask_selector; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) { - return TextureType::Texture1D; - } - if (texture_info >= 1 && texture_info <= 9) { - return TextureType::Texture2D; - } - if (texture_info >= 10 && texture_info <= 11) { - return TextureType::Texture3D; - } - if (texture_info >= 12 && texture_info <= 13) { - return TextureType::TextureCube; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - switch (texture_info) { - case 0: - case 2: - case 6: - case 8: - case 9: - case 11: - return TextureProcessMode::LZ; - case 3: - case 5: - case 13: - return TextureProcessMode::LL; - default: - break; - } - return TextureProcessMode::None; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::DC: - return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info >= 7 && texture_info <= 9; - } - - [[nodiscard]] bool HasTwoDestinations() const { - return gpr28.Value() != Register::ZeroIndex; - } - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - static constexpr std::array, 4> mask_lut{{ - {}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}, - }}; - - std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; - index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - - u32 mask = mask_lut[index][component_mask_selector]; - // A mask of 0 means this instruction uses an unimplemented mask. - ASSERT(mask != 0); - return ((1ull << component) & mask) != 0; - } - } texs; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi; - BitField<49, 1, u64> nodep_flag; - BitField<50, 1, u64> ms; // Multisample? - BitField<54, 1, u64> cl; - BitField<55, 1, u64> process_mode; - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; - } - } tld; - - union { - BitField<49, 1, u64> nodep_flag; - BitField<53, 4, u64> texture_info; - BitField<59, 1, u64> fp32_flag; - - [[nodiscard]] TextureType GetTextureType() const { - // The TLDS instruction has a weird encoding for the texture type. - if (texture_info <= 1) { - return TextureType::Texture1D; - } - if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - (texture_info >= 4 && texture_info <= 6)) { - return TextureType::Texture2D; - } - if (texture_info == 7) { - return TextureType::Texture3D; - } - - LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value()); - UNREACHABLE(); - return TextureType::Texture1D; - } - - [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) { - return TextureProcessMode::LL; - } - return TextureProcessMode::LZ; - } - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return texture_info == 12 || texture_info == 4; - case TextureMiscMode::MZ: - return texture_info == 5; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - [[nodiscard]] bool IsArrayTexture() const { - // TEXS only supports Texture2D arrays. - return texture_info == 8; - } - } tlds; - - union { - BitField<28, 1, u64> is_array; - BitField<29, 2, TextureType> texture_type; - BitField<35, 1, u64> aoffi_flag; - BitField<49, 1, u64> nodep_flag; - - [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { - switch (mode) { - case TextureMiscMode::AOFFI: - return aoffi_flag != 0; - case TextureMiscMode::NODEP: - return nodep_flag != 0; - default: - break; - } - return false; - } - - } txd; - - union { - BitField<24, 2, StoreCacheManagement> cache_management; - BitField<33, 3, ImageType> image_type; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - BitField<51, 1, u64> is_immediate; - BitField<52, 1, SurfaceDataMode> mode; - - BitField<20, 3, StoreType> store_data_layout; - BitField<20, 4, u64> component_mask_selector; - - [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { - ASSERT(mode == SurfaceDataMode::P); - constexpr u8 R = 0b0001; - constexpr u8 G = 0b0010; - constexpr u8 B = 0b0100; - constexpr u8 A = 0b1000; - constexpr std::array mask = { - 0, (R), (G), (R | G), (B), (R | B), - (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), - (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; - return std::bitset<4>{mask.at(component_mask_selector)}.test(component); - } - - [[nodiscard]] StoreType GetStoreDataLayout() const { - ASSERT(mode == SurfaceDataMode::D_BA); - return store_data_layout; - } - } suldst; - - union { - BitField<28, 1, u64> is_ba; - BitField<51, 3, ImageAtomicOperationType> operation_type; - BitField<33, 3, ImageType> image_type; - BitField<29, 4, ImageAtomicOperation> operation; - BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; - } suatom_d; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchTarget() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } bra; - - union { - BitField<20, 24, u64> target; - BitField<5, 1, u64> constant_buffer; - - [[nodiscard]] s32 GetBranchExtend() const { - // Sign extend the branch target offset - const auto mask = 1U << (24 - 1); - const auto target_value = static_cast(target); - constexpr auto instruction_size = static_cast(sizeof(Instruction)); - - // The branch offset is relative to the next instruction and is stored in bytes, so - // divide it by the size of an instruction and add 1 to it. - return static_cast((target_value ^ mask) - mask) / instruction_size + 1; - } - } brx; - - union { - BitField<39, 1, u64> emit; // EmitVertex - BitField<40, 1, u64> cut; // EndPrimitive - } out; - - union { - BitField<31, 1, u64> skew; - BitField<32, 1, u64> o; - BitField<33, 2, IsberdMode> mode; - BitField<47, 2, IsberdShift> shift; - } isberd; - - union { - BitField<8, 2, MembarType> type; - BitField<0, 2, MembarUnknown> unknown; - } membar; - - union { - BitField<48, 1, u64> signed_a; - BitField<38, 1, u64> is_byte_chunk_a; - BitField<36, 2, VideoType> type_a; - BitField<36, 2, u64> byte_height_a; - - BitField<49, 1, u64> signed_b; - BitField<50, 1, u64> use_register_b; - BitField<30, 1, u64> is_byte_chunk_b; - BitField<28, 2, VideoType> type_b; - BitField<28, 2, u64> byte_height_b; - } video; - - union { - BitField<51, 2, VmadShr> shr; - BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) - BitField<47, 1, u64> cc; - } vmad; - - union { - BitField<54, 1, u64> is_dest_signed; - BitField<48, 1, u64> is_src_a_signed; - BitField<49, 1, u64> is_src_b_signed; - BitField<37, 2, u64> src_format_a; - BitField<29, 2, u64> src_format_b; - BitField<56, 1, u64> mx; - BitField<55, 1, u64> sat; - BitField<36, 2, u64> selector_a; - BitField<28, 2, u64> selector_b; - BitField<50, 1, u64> is_op_b_register; - BitField<51, 3, VmnmxOperation> operation; - - [[nodiscard]] VmnmxType SourceFormatA() const { - switch (src_format_a) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - - [[nodiscard]] VmnmxType SourceFormatB() const { - switch (src_format_b) { - case 0b11: - return VmnmxType::Bits32; - case 0b10: - return VmnmxType::Bits16; - default: - return VmnmxType::Bits8; - } - } - } vmnmx; - - union { - BitField<20, 16, u64> imm20_16; - BitField<35, 1, u64> high_b_rr; // used on RR - BitField<36, 1, u64> product_shift_left; - BitField<37, 1, u64> merge_37; - BitField<48, 1, u64> sign_a; - BitField<49, 1, u64> sign_b; - BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC - BitField<50, 3, XmadMode> mode; - BitField<52, 1, u64> high_b; - BitField<53, 1, u64> high_a; - BitField<55, 1, u64> product_shift_left_second; // used on CR - BitField<56, 1, u64> merge_56; - } xmad; - - union { - BitField<20, 14, u64> shifted_offset; - BitField<34, 5, u64> index; - - [[nodiscard]] u64 GetOffset() const { - return shifted_offset * 4; - } - } cbuf34; - - union { - BitField<20, 16, s64> offset; - BitField<36, 5, u64> index; - - [[nodiscard]] s64 GetOffset() const { - return offset; - } - } cbuf36; - - // Unsure about the size of this one. - // It's always used with a gpr0, so any size should be fine. - BitField<20, 8, SystemVariable> sys20; - - BitField<47, 1, u64> generates_cc; - BitField<61, 1, u64> is_b_imm; - BitField<60, 1, u64> is_b_gpr; - BitField<59, 1, u64> is_c_gpr; - BitField<20, 24, s64> smem_imm; - BitField<0, 5, ConditionCode> flow_condition_code; - - Attribute attribute; - Sampler sampler; - Image image; - - u64 value; -}; -static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout_v, "Instruction is not standard layout"); - -class OpCode { -public: - enum class Id { - KIL, - SSY, - SYNC, - BRK, - DEPBAR, - VOTE, - VOTE_VTG, - SHFL, - FSWZADD, - BFE_C, - BFE_R, - BFE_IMM, - BFI_RC, - BFI_IMM_R, - BRA, - BRX, - PBK, - LD_A, - LD_L, - LD_S, - LD_C, - LD, // Load from generic memory - LDG, // Load from global memory - ST_A, - ST_L, - ST_S, - ST, // Store in generic memory - STG, // Store in global memory - RED, // Reduction operation - ATOM, // Atomic operation on global memory - ATOMS, // Atomic operation on shared memory - AL2P, // Transforms attribute memory into physical memory - TEX, - TEX_B, // Texture Load Bindless - TXQ, // Texture Query - TXQ_B, // Texture Query Bindless - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLD, // Texture Load - TLDS, // Texture Load with scalar/non-vec4 source/destinations - TLD4, // Texture Gather 4 - TLD4_B, // Texture Gather 4 Bindless - TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations - TMML_B, // Texture Mip Map Level - TMML, // Texture Mip Map Level - TXD, // Texture Gradient/Load with Derivates - TXD_B, // Texture Gradient/Load with Derivates Bindless - SUST, // Surface Store - SULD, // Surface Load - SUATOM, // Surface Atomic Operation - EXIT, - NOP, - IPA, - OUT_R, // Emit vertex/primitive - ISBERD, - BAR, - MEMBAR, - VMAD, - VSETP, - VMNMX, - FFMA_IMM, // Fused Multiply and Add - FFMA_CR, - FFMA_RC, - FFMA_RR, - FADD_C, - FADD_R, - FADD_IMM, - FADD32I, - FMUL_C, - FMUL_R, - FMUL_IMM, - FMUL32_IMM, - IADD_C, - IADD_R, - IADD_IMM, - IADD3_C, // Add 3 Integers - IADD3_R, - IADD3_IMM, - IADD32I, - ISCADD_C, // Scale and Add - ISCADD_R, - ISCADD_IMM, - FLO_R, - FLO_C, - FLO_IMM, - LEA_R1, - LEA_R2, - LEA_RZ, - LEA_IMM, - LEA_HI, - HADD2_C, - HADD2_R, - HADD2_IMM, - HMUL2_C, - HMUL2_R, - HMUL2_IMM, - HFMA2_CR, - HFMA2_RC, - HFMA2_RR, - HFMA2_IMM_R, - HSETP2_C, - HSETP2_R, - HSETP2_IMM, - HSET2_C, - HSET2_R, - HSET2_IMM, - POPC_C, - POPC_R, - POPC_IMM, - SEL_C, - SEL_R, - SEL_IMM, - ICMP_RC, - ICMP_R, - ICMP_CR, - ICMP_IMM, - FCMP_RR, - FCMP_RC, - FCMP_IMMR, - MUFU, // Multi-Function Operator - RRO_C, // Range Reduction Operator - RRO_R, - RRO_IMM, - F2F_C, - F2F_R, - F2F_IMM, - F2I_C, - F2I_R, - F2I_IMM, - I2F_C, - I2F_R, - I2F_IMM, - I2I_C, - I2I_R, - I2I_IMM, - LOP_C, - LOP_R, - LOP_IMM, - LOP32I, - LOP3_C, - LOP3_R, - LOP3_IMM, - MOV_C, - MOV_R, - MOV_IMM, - S2R, - MOV32_IMM, - SHL_C, - SHL_R, - SHL_IMM, - SHR_C, - SHR_R, - SHR_IMM, - SHF_RIGHT_R, - SHF_RIGHT_IMM, - SHF_LEFT_R, - SHF_LEFT_IMM, - FMNMX_C, - FMNMX_R, - FMNMX_IMM, - IMNMX_C, - IMNMX_R, - IMNMX_IMM, - FSETP_C, // Set Predicate - FSETP_R, - FSETP_IMM, - FSET_C, - FSET_R, - FSET_IMM, - ISETP_C, - ISETP_IMM, - ISETP_R, - ISET_R, - ISET_C, - ISET_IMM, - PSETP, - PSET, - CSETP, - R2P_IMM, - P2R_IMM, - XMAD_IMM, - XMAD_CR, - XMAD_RC, - XMAD_RR, - }; - - enum class Type { - Trivial, - Arithmetic, - ArithmeticImmediate, - ArithmeticInteger, - ArithmeticIntegerImmediate, - ArithmeticHalf, - ArithmeticHalfImmediate, - Bfe, - Bfi, - Shift, - Ffma, - Hfma2, - Flow, - Synch, - Warp, - Memory, - Texture, - Image, - FloatSet, - FloatSetPredicate, - IntegerSet, - IntegerSetPredicate, - HalfSet, - HalfSetPredicate, - PredicateSetPredicate, - PredicateSetRegister, - RegisterSetPredicate, - Conversion, - Video, - Xmad, - Unknown, - }; - - /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be - /// conditionally executed). - [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { - // TODO(Subv): Add the rest of unpredicated instructions. - return opcode != Id::SSY && opcode != Id::PBK; - } - - class Matcher { - public: - constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) - : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - - [[nodiscard]] constexpr const char* GetName() const { - return name; - } - - [[nodiscard]] constexpr u16 GetMask() const { - return mask; - } - - [[nodiscard]] constexpr Id GetId() const { - return id; - } - - [[nodiscard]] constexpr Type GetType() const { - return type; - } - - /** - * Tests to see if the given instruction is the instruction this matcher represents. - * @param instruction The instruction to test - * @returns true if the given instruction matches. - */ - [[nodiscard]] constexpr bool Matches(u16 instruction) const { - return (instruction & mask) == expected; - } - - private: - const char* name; - u16 mask; - u16 expected; - Id id; - Type type; - }; - - using DecodeResult = std::optional>; - [[nodiscard]] static DecodeResult Decode(Instruction instr) { - static const auto table{GetDecodeTable()}; - - const auto matches_instruction = [instr](const auto& matcher) { - return matcher.Matches(static_cast(instr.opcode)); - }; - - auto iter = std::find_if(table.begin(), table.end(), matches_instruction); - return iter != table.end() ? std::optional>(*iter) - : std::nullopt; - } - -private: - struct Detail { - private: - static constexpr std::size_t opcode_bitsize = 16; - - /** - * Generates the mask and the expected value after masking from a given bitstring. - * A '0' in a bitstring indicates that a zero must be present at that bit position. - * A '1' in a bitstring indicates that a one must be present at that bit position. - */ - [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { - u16 mask = 0, expect = 0; - for (std::size_t i = 0; i < opcode_bitsize; i++) { - const std::size_t bit_position = opcode_bitsize - i - 1; - switch (bitstring[i]) { - case '0': - mask |= static_cast(1U << bit_position); - break; - case '1': - expect |= static_cast(1U << bit_position); - mask |= static_cast(1U << bit_position); - break; - default: - // Ignore - break; - } - } - return std::make_pair(mask, expect); - } - - public: - /// Creates a matcher that can match and parse instructions based on bitstring. - [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, - Type type, const char* const name) { - const auto [mask, expected] = GetMaskAndExpect(bitstring); - return Matcher(name, mask, expected, op, type); - } - }; - - [[nodiscard]] static std::vector GetDecodeTable() { - std::vector table = { -#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) - INST("111000110011----", Id::KIL, Type::Flow, "KIL"), - INST("111000101001----", Id::SSY, Type::Flow, "SSY"), - INST("111000101010----", Id::PBK, Type::Flow, "PBK"), - INST("111000100100----", Id::BRA, Type::Flow, "BRA"), - INST("111000100101----", Id::BRX, Type::Flow, "BRX"), - INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), - INST("111000110100----", Id::BRK, Type::Flow, "BRK"), - INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), - INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), - INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), - INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), - INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), - INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), - INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), - INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), - INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), - INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), - INST("100-------------", Id::LD, Type::Memory, "LD"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), - INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), - INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), - INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("101-------------", Id::ST, Type::Memory, "ST"), - INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("1110101111111---", Id::RED, Type::Memory, "RED"), - INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), - INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), - INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), - INST("110000----111---", Id::TEX, Type::Texture, "TEX"), - INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), - INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), - INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), - INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), - INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), - INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"), - INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"), - INST("11011110001110--", Id::TXD, Type::Texture, "TXD"), - INST("11101011001-----", Id::SUST, Type::Image, "SUST"), - INST("11101011000-----", Id::SULD, Type::Image, "SULD"), - INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), - INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), - INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), - INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), - INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), - INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), - INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), - INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), - INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), - INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), - INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), - INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), - INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), - INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"), - INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"), - INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"), - INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"), - INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"), - INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"), - INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"), - INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), - INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"), - INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"), - INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"), - INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"), - INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"), - INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"), - INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"), - INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"), - INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"), - INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"), - INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"), - INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"), - INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"), - INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"), - INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"), - INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"), - INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"), - INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"), - INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"), - INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"), - INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"), - INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"), - INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"), - INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"), - INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"), - INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"), - INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), - INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), - INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), - INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), - INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), - INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), - INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), - INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), - INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), - INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), - INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), - INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), - INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), - INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), - INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), - INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), - INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), - INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), - INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), - INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), - INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"), - INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), - INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), - INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), - INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), - INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), - INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), - INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"), - INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"), - INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"), - INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), - INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), - INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), - INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), - INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), - INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), - INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"), - INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"), - INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"), - INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"), - INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"), - INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"), - INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"), - INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"), - INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"), - INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"), - INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"), - INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"), - INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"), - INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"), - INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"), - INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"), - INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"), - INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"), - INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"), - INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"), - INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"), - INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), - INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"), - INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"), - INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"), - INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), - INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), - INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), - INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), - INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), - INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), - INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), - INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), - INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), - INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), - INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"), - INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), - INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), - INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), - INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), - INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), - INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), - INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), - }; -#undef INST - std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { - // If a matcher has more bits in its mask it is more specific, so it - // should come first. - return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count(); - }); - - return table; - } -}; - -} // namespace Tegra::Shader diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h deleted file mode 100644 index e0d7b89c5..000000000 --- a/src/video_core/engines/shader_header.h +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/bit_field.h" -#include "common/common_funcs.h" -#include "common/common_types.h" - -namespace Tegra::Shader { - -enum class OutputTopology : u32 { - PointList = 1, - LineStrip = 6, - TriangleStrip = 7, -}; - -enum class PixelImap : u8 { - Unused = 0, - Constant = 1, - Perspective = 2, - ScreenLinear = 3, -}; - -// Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html -struct Header { - union { - BitField<0, 5, u32> sph_type; - BitField<5, 5, u32> version; - BitField<10, 4, u32> shader_type; - BitField<14, 1, u32> mrt_enable; - BitField<15, 1, u32> kills_pixels; - BitField<16, 1, u32> does_global_store; - BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; - BitField<26, 1, u32> does_load_or_store; - BitField<27, 1, u32> does_fp64; - BitField<28, 4, u32> stream_out_mask; - } common0; - - union { - BitField<0, 24, u32> shader_local_memory_low_size; - BitField<24, 8, u32> per_patch_attribute_count; - } common1; - - union { - BitField<0, 24, u32> shader_local_memory_high_size; - BitField<24, 8, u32> threads_per_input_primitive; - } common2; - - union { - BitField<0, 24, u32> shader_local_memory_crs_size; - BitField<24, 4, OutputTopology> output_topology; - BitField<28, 4, u32> reserved; - } common3; - - union { - BitField<0, 12, u32> max_output_vertices; - BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<20, 4, u32> reserved; - BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4; - - union { - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - union { - BitField<0, 8, u16> clip_distances; - BitField<8, 1, u16> point_sprite_s; - BitField<9, 1, u16> point_sprite_t; - BitField<10, 1, u16> fog_coordinate; - BitField<12, 1, u16> tessellation_eval_point_u; - BitField<13, 1, u16> tessellation_eval_point_v; - BitField<14, 1, u16> instance_id; - BitField<15, 1, u16> vertex_id; - }; - INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved - INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB - INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32] - INSERT_PADDING_BYTES_NOINIT(2); // OmapColor - INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - } vtg; - - struct { - INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA - INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB - - union { - BitField<0, 2, PixelImap> x; - BitField<2, 2, PixelImap> y; - BitField<4, 2, PixelImap> z; - BitField<6, 2, PixelImap> w; - u8 raw; - } imap_generic_vector[32]; - - INSERT_PADDING_BYTES_NOINIT(2); // ImapColor - INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC - INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] - INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved - - struct { - u32 target; - union { - BitField<0, 1, u32> sample_mask; - BitField<1, 1, u32> depth; - BitField<2, 30, u32> reserved; - }; - } omap; - - bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { - const u32 bit = render_target * 4 + component; - return omap.target & (1 << bit); - } - - PixelImap GetPixelImap(u32 attribute) const { - const auto get_index = [this, attribute](u32 index) { - return static_cast( - (imap_generic_vector[attribute].raw >> (index * 2)) & 3); - }; - - std::optional result; - for (u32 component = 0; component < 4; ++component) { - const PixelImap index = get_index(component); - if (index == PixelImap::Unused) { - continue; - } - if (result && result != index) { - LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); - } - result = index; - } - return result.value_or(PixelImap::Unused); - } - } ps; - - std::array raw; - }; - - u64 GetLocalMemorySize() const { - return (common1.shader_local_memory_low_size | - (common2.shader_local_memory_high_size << 24)); - } -}; -static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); - -} // namespace Tegra::Shader diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7a3660496..588ce6139 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -4,6 +4,9 @@ #include +#include + +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" @@ -13,9 +16,142 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { +namespace { +vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) { + boost::container::small_vector bindings; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + bindings.push_back({ + .binding = binding, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); + ++binding; + } + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data(), + }); +} + +vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( + const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout, + VkPipelineLayout pipeline_layout) { + boost::container::small_vector entries; + size_t offset{}; + u32 binding{}; + for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { + entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = offset, + .stride = sizeof(DescriptorUpdateEntry), + }); + ++binding; + offset += sizeof(DescriptorUpdateEntry); + } + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET, + .descriptorSetLayout = descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE, + .pipelineLayout = pipeline_layout, + .set = 0, + }); +} +} // Anonymous namespace + +ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue_, + const Shader::Info& info_, vk::ShaderModule spv_module_) + : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + spv_module(std::move(spv_module_)), + descriptor_set_layout(CreateDescriptorSetLayout(device, info)), + descriptor_allocator(descriptor_pool, *descriptor_set_layout), + pipeline_layout{device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + })}, + descriptor_update_template{ + CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)}, + pipeline{device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + })} {} + +void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { + u32 enabled_uniforms{}; + for (const auto& desc : info.constant_buffer_descriptors) { + enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; + } + buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); -ComputePipeline::ComputePipeline() = default; + buffer_cache.UnbindComputeStorageBuffers(); + size_t index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); +} -ComputePipeline::~ComputePipeline() = default; +VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; + update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + return descriptor_set; +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 433d8bb3d..dc045d524 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -5,19 +5,52 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_pipeline.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { class Device; -class VKScheduler; -class VKUpdateDescriptorQueue; -class ComputePipeline { +class ComputePipeline : public Pipeline { public: - explicit ComputePipeline(); - ~ComputePipeline(); + explicit ComputePipeline() = default; + explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, + VKUpdateDescriptorQueue& update_descriptor_queue, + const Shader::Info& info, vk::ShaderModule spv_module); + + ComputePipeline& operator=(ComputePipeline&&) noexcept = default; + ComputePipeline(ComputePipeline&&) noexcept = default; + + ComputePipeline& operator=(const ComputePipeline&) = delete; + ComputePipeline(const ComputePipeline&) = delete; + + void ConfigureBufferCache(BufferCache& buffer_cache); + + [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); + + [[nodiscard]] VkPipeline Handle() const noexcept { + return *pipeline; + } + + [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { + return *pipeline_layout; + } + +private: + VKUpdateDescriptorQueue* update_descriptor_queue; + Shader::Info info; + + vk::ShaderModule spv_module; + vk::DescriptorSetLayout descriptor_set_layout; + DescriptorAllocator descriptor_allocator; + vk::PipelineLayout pipeline_layout; + vk::DescriptorUpdateTemplateKHR descriptor_update_template; + vk::Pipeline pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..3bea1ff44 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20; DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, VkDescriptorSetLayout layout_) : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), - descriptor_pool{descriptor_pool_}, layout{layout_} {} - -DescriptorAllocator::~DescriptorAllocator() = default; + descriptor_pool{&descriptor_pool_}, layout{layout_} {} VkDescriptorSet DescriptorAllocator::Commit() { const std::size_t index = CommitResource(); @@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() { } void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { - descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); + descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin)); } VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..2501f9967 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -17,8 +17,12 @@ class VKScheduler; class DescriptorAllocator final : public ResourcePool { public: + explicit DescriptorAllocator() = default; explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); - ~DescriptorAllocator() override; + ~DescriptorAllocator() override = default; + + DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; + DescriptorAllocator(DescriptorAllocator&&) noexcept = default; DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; @@ -29,8 +33,8 @@ protected: void Allocate(std::size_t begin, std::size_t end) override; private: - VKDescriptorPool& descriptor_pool; - const VkDescriptorSetLayout layout; + VKDescriptorPool* descriptor_pool{}; + VkDescriptorSetLayout layout{}; std::vector descriptors_allocations; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h new file mode 100644 index 000000000..b06288403 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_pipeline.h @@ -0,0 +1,36 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Pipeline { +public: + /// Add a reference count to the pipeline + void AddRef() noexcept { + ++ref_count; + } + + [[nodiscard]] bool RemoveRef() noexcept { + --ref_count; + return ref_count == 0; + } + + [[nodiscard]] u64 UsageTick() const noexcept { + return usage_tick; + } + +protected: + u64 usage_tick{}; + +private: + size_t ref_count{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7d0ba1180..4bf3e4819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -12,6 +12,8 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" +#include "shader_recompiler/environment.h" +#include "shader_recompiler/recompiler.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -22,43 +24,105 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); using Tegra::Engines::ShaderType; namespace { -size_t StageFromProgram(size_t program) { - return program == 0 ? 0 : program - 1; -} +class Environment final : public Shader::Environment { +public: + explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_) + : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {} + + ~Environment() override = default; + + [[nodiscard]] std::optional Analyze(u32 start_address) { + const std::optional size{TryFindSize(start_address)}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash128(reinterpret_cast(code.data()), code.size()); + } -ShaderType StageFromProgram(Maxwell::ShaderProgram program) { - return static_cast(StageFromProgram(static_cast(program))); -} + [[nodiscard]] size_t ShaderSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; + } -ShaderType GetShaderType(Maxwell::ShaderProgram program) { - switch (program) { - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - default: - UNIMPLEMENTED_MSG("program={}", program); - return ShaderType::Vertex; + [[nodiscard]] u128 ComputeHash() const { + const size_t size{ShaderSize()}; + auto data = std::make_unique(size); + gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash128(reinterpret_cast(data.get()), size); } -} + + u64 ReadInstruction(u32 address) override { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[address / INST_SIZE]; + } + return gpu_memory.Read(program_base + address); + } + + std::array WorkgroupSize() override { + const auto& qmd{kepler_compute.launch_description}; + return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; + } + +private: + static constexpr size_t INST_SIZE = sizeof(u64); + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + std::optional TryFindSize(u32 start_address) { + GPUVAddr guest_addr = program_base + start_address; + size_t offset = 0; + size_t size = BLOCK_SIZE; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) { + const u64 inst = data[i / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + i; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; + } + + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + GPUVAddr program_base; + + u32 read_lowest = 0; + u32 read_highest = 0; + + std::vector code; + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; +}; } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader() = default; - -Shader::~Shader() = default; - PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, + : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ update_descriptor_queue_} {} PipelineCache::~PipelineCache() = default; -ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { +ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); - auto& entry = pair->second; - if (!is_cache_miss) { - return *entry; + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + return nullptr; + } + ShaderInfo* const shader{TryGet(*cpu_shader_addr)}; + if (!shader) { + return CreateComputePipelineWithoutShader(*cpu_shader_addr); + } + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)}; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return &pipeline; + } + pipeline = CreateComputePipeline(shader); + shader->compute_users.push_back(key); + return &pipeline; +} + +ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + Environment env{kepler_compute, gpu_memory, program_base}; + if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { + // TODO: Load from cache } - LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - throw "Bad"; + const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + shader_info->unique_hash = env.ComputeHash(); + shader_info->size_bytes = env.ShaderSize(); + return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, + BuildShader(device, code)}; } -void PipelineCache::OnShaderRemoval(Shader*) {} +ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) { + ShaderInfo shader; + ComputePipeline pipeline{CreateComputePipeline(&shader)}; + const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)}; + shader.compute_users.push_back(key); + pipeline.AddRef(); + + const size_t size_bytes{shader.size_bytes}; + Register(std::make_unique(std::move(shader)), shader_cpu_addr, size_bytes); + return &compute_cache.emplace(key, std::move(pipeline)).first->second; +} + +ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const { + const auto& qmd{kepler_compute.launch_description}; + return { + .unique_hash = unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; +} + +void PipelineCache::OnShaderRemoval(ShaderInfo* shader) { + for (const ComputePipelineCacheKey& key : shader->compute_users) { + const auto it = compute_cache.find(key); + ASSERT(it != compute_cache.end()); + + Pipeline& pipeline = it->second; + if (pipeline.RemoveRef()) { + // Wait for the pipeline to be free of GPU usage before destroying it + scheduler.Wait(pipeline.UsageTick()); + compute_cache.erase(it); + } + } +} } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e3e63340d..eb35abc27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - GPUVAddr shader; + u128 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -67,13 +67,13 @@ struct hash { namespace Vulkan { -class Shader { -public: - explicit Shader(); - ~Shader(); +struct ShaderInfo { + u128 unique_hash{}; + size_t size_bytes{}; + std::vector compute_users; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache final : public VideoCommon::ShaderCache { public: explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, @@ -83,12 +83,18 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~PipelineCache() override; - ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); protected: - void OnShaderRemoval(Shader* shader) final; + void OnShaderRemoval(ShaderInfo* shader) override; private: + ComputePipeline CreateComputePipeline(ShaderInfo* shader); + + ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr); + + ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const; + Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -99,13 +105,7 @@ private: VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; - - std::mutex pipeline_cache; - std::unordered_map> compute_cache; + std::unordered_map compute_cache; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f152297d9..b757454c4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,8 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#pragma optimize("", off) + namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() { } void RasterizerVulkan::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; + if (!pipeline) { + return; + } + std::scoped_lock lock{buffer_cache.mutex}; + update_descriptor_queue.Acquire(); + pipeline->ConfigureBufferCache(buffer_cache); + const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + + const auto& qmd{kepler_compute.launch_description}; + const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; + const VkPipeline pipeline_handle{pipeline->Handle()}; + const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; + scheduler.Record( + [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, + descriptor_set, nullptr); + cmdbuf.Dispatch(dim[0], dim[1], dim[2]); + }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 31017dc2b..3fd03b915 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -21,7 +21,6 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" -#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -150,8 +149,6 @@ private: BlitImageHelper blit_image; ASTCDecoderPass astc_decoder_pass; - GraphicsPipelineCacheKey graphics_key; - TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -10,18 +10,16 @@ namespace Vulkan { ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) - : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} - -ResourcePool::~ResourcePool() = default; + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} size_t ResourcePool::CommitResource() { // Refresh semaphore to query updated results - master_semaphore.Refresh(); - const u64 gpu_tick = master_semaphore.KnownGpuTick(); + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional { for (size_t iterator = begin; iterator < end; ++iterator) { if (gpu_tick >= ticks[iterator]) { - ticks[iterator] = master_semaphore.CurrentTick(); + ticks[iterator] = master_semaphore->CurrentTick(); return iterator; } } @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { // Both searches failed, the pool is full; handle it. const size_t free_resource = ManageOverflow(); - ticks[free_resource] = master_semaphore.CurrentTick(); + ticks[free_resource] = master_semaphore->CurrentTick(); found = free_resource; } } diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -18,8 +18,16 @@ class MasterSemaphore; */ class ResourcePool { public: + explicit ResourcePool() = default; explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); - virtual ~ResourcePool(); + + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; protected: size_t CommitResource(); @@ -34,7 +42,7 @@ private: /// Allocates a new page of resources. void Grow(); - MasterSemaphore& master_semaphore; + MasterSemaphore* master_semaphore{}; size_t grow_step = 0; ///< Number of new resources created after an overflow size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found std::vector ticks; ///< Ticks for each resource -- cgit v1.2.3 From 6db69990da9f232e6d982cdcb69c2e27d93075cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 19 Feb 2021 18:10:18 -0300 Subject: spirv: Add lower fp16 to fp32 pass --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_context.cpp | 29 ++++-- src/shader_recompiler/backend/spirv/emit_context.h | 6 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 2 + src/shader_recompiler/backend/spirv/emit_spirv.h | 103 +++++++++++---------- .../spirv/emit_spirv_bitwise_conversion.cpp | 28 ++++-- .../backend/spirv/emit_spirv_composite.cpp | 48 +++++----- .../backend/spirv/emit_spirv_control_flow.cpp | 2 +- .../backend/spirv/emit_spirv_convert.cpp | 89 ++++++++++++++++++ .../backend/spirv/emit_spirv_floating_point.cpp | 48 +++++----- .../backend/spirv/emit_spirv_integer.cpp | 16 ---- .../backend/spirv/emit_spirv_logical.cpp | 72 +------------- .../backend/spirv/emit_spirv_memory.cpp | 22 +++-- src/shader_recompiler/frontend/ir/condition.h | 2 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 70 +++++++------- .../frontend/ir/microinstruction.cpp | 4 + .../frontend/ir/microinstruction.h | 2 + src/shader_recompiler/frontend/ir/opcodes.inc | 6 +- src/shader_recompiler/frontend/ir/program.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 + .../impl/floating_point_conversion_integer.cpp | 62 ++++++++++--- .../frontend/maxwell/translate/impl/impl.h | 2 +- .../ir_opt/collect_shader_info_pass.cpp | 18 +++- .../ir_opt/constant_propagation_pass.cpp | 12 +-- .../ir_opt/lower_fp16_to_fp32.cpp | 79 ++++++++++++++++ src/shader_recompiler/ir_opt/passes.h | 1 + src/shader_recompiler/main.cpp | 10 +- src/shader_recompiler/object_pool.h | 2 +- src/video_core/renderer_vulkan/vk_compute_pass.cpp | 3 + .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 - src/video_core/vulkan_common/vulkan_device.cpp | 10 +- 32 files changed, 479 insertions(+), 285 deletions(-) create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp create mode 100644 src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b56bdd3d9..6047f3ebe 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_composite.cpp backend/spirv/emit_spirv_context_get_set.cpp backend/spirv/emit_spirv_control_flow.cpp + backend/spirv/emit_spirv_convert.cpp backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp @@ -82,6 +83,7 @@ add_library(shader_recompiler STATIC ir_opt/dead_code_elimination_pass.cpp ir_opt/global_memory_to_storage_buffer_pass.cpp ir_opt/identity_removal_pass.cpp + ir_opt/lower_fp16_to_fp32.cpp ir_opt/passes.h ir_opt/ssa_rewrite_pass.cpp ir_opt/verification_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 770067d98..ea1c8a3be 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -30,8 +30,11 @@ EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { DefineCommonTypes(program.info); DefineCommonConstants(); DefineSpecialVariables(program.info); - DefineConstantBuffers(program.info); - DefineStorageBuffers(program.info); + + u32 binding{}; + DefineConstantBuffers(program.info, binding); + DefineStorageBuffers(program.info, binding); + DefineLabels(program); } @@ -58,6 +61,12 @@ void EmitContext::DefineCommonTypes(const Info& info) { U1 = Name(TypeBool(), "u1"); + // TODO: Conditionally define these + AddCapability(spv::Capability::Int16); + AddCapability(spv::Capability::Int64); + U16 = Name(TypeInt(16, false), "u16"); + U64 = Name(TypeInt(64, false), "u64"); + F32.Define(*this, TypeFloat(32), "f32"); U32.Define(*this, TypeInt(32, false), "u32"); @@ -95,12 +104,12 @@ void EmitContext::DefineSpecialVariables(const Info& info) { } } -void EmitContext::DefineConstantBuffers(const Info& info) { +void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { if (info.constant_buffer_descriptors.empty()) { return; } const Id array_type{TypeArray(U32[1], Constant(U32[1], 4096))}; - Decorate(array_type, spv::Decoration::ArrayStride, 16U); + Decorate(array_type, spv::Decoration::ArrayStride, 4U); const Id struct_type{TypeStruct(array_type)}; Name(struct_type, "cbuf_block"); @@ -111,18 +120,19 @@ void EmitContext::DefineConstantBuffers(const Info& info) { const Id uniform_type{TypePointer(spv::StorageClass::Uniform, struct_type)}; uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]); - u32 binding{}; + u32 index{}; for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); Name(id, fmt::format("c{}", desc.index)); std::fill_n(cbufs.data() + desc.index, desc.count, id); + index += desc.count; binding += desc.count; } } -void EmitContext::DefineStorageBuffers(const Info& info) { +void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { if (info.storage_buffers_descriptors.empty()) { return; } @@ -140,13 +150,14 @@ void EmitContext::DefineStorageBuffers(const Info& info) { const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); - u32 binding{}; + u32 index{}; for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; Decorate(id, spv::Decoration::Binding, binding); Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("ssbo{}", binding)); - std::fill_n(ssbos.data() + binding, desc.count, id); + Name(id, fmt::format("ssbo{}", index)); + std::fill_n(ssbos.data() + index, desc.count, id); + index += desc.count; binding += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index c4b84759d..8de203da2 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -37,6 +37,8 @@ public: Id void_id{}; Id U1{}; + Id U16{}; + Id U64{}; VectorTypes F32; VectorTypes U32; VectorTypes F16; @@ -59,8 +61,8 @@ private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); void DefineSpecialVariables(const Info& info); - void DefineConstantBuffers(const Info& info); - void DefineStorageBuffers(const Info& info); + void DefineConstantBuffers(const Info& info, u32& binding); + void DefineStorageBuffers(const Info& info, u32& binding); void DefineLabels(IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index d59718435..4ce07c281 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,6 +14,8 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#pragma optimize("", off) + namespace Shader::Backend::SPIRV { namespace { template diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5813f51ff..2b59c0b72 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -79,26 +79,27 @@ void EmitWriteStorageU16(EmitContext& ctx); void EmitWriteStorageS16(EmitContext& ctx); void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); -void EmitWriteStorage64(EmitContext& ctx); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); void EmitWriteStorage128(EmitContext& ctx); -void EmitCompositeConstructU32x2(EmitContext& ctx); -void EmitCompositeConstructU32x3(EmitContext& ctx); -void EmitCompositeConstructU32x4(EmitContext& ctx); -void EmitCompositeExtractU32x2(EmitContext& ctx); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index); -void EmitCompositeExtractU32x4(EmitContext& ctx); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF16x2(EmitContext& ctx); void EmitCompositeConstructF16x3(EmitContext& ctx); void EmitCompositeConstructF16x4(EmitContext& ctx); -void EmitCompositeExtractF16x2(EmitContext& ctx); -void EmitCompositeExtractF16x3(EmitContext& ctx); -void EmitCompositeExtractF16x4(EmitContext& ctx); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF32x2(EmitContext& ctx); void EmitCompositeConstructF32x3(EmitContext& ctx); void EmitCompositeConstructF32x4(EmitContext& ctx); -void EmitCompositeExtractF32x2(EmitContext& ctx); -void EmitCompositeExtractF32x3(EmitContext& ctx); -void EmitCompositeExtractF32x4(EmitContext& ctx); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -116,11 +117,13 @@ void EmitBitCastF16U16(EmitContext& ctx); Id EmitBitCastF32U32(EmitContext& ctx, Id value); void EmitBitCastF64U64(EmitContext& ctx); void EmitPackUint2x32(EmitContext& ctx); -void EmitUnpackUint2x32(EmitContext& ctx); -void EmitPackFloat2x16(EmitContext& ctx); -void EmitUnpackFloat2x16(EmitContext& ctx); -void EmitPackDouble2x32(EmitContext& ctx); -void EmitUnpackDouble2x32(EmitContext& ctx); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); @@ -159,18 +162,18 @@ void EmitFPLog2(EmitContext& ctx); void EmitFPSaturate16(EmitContext& ctx); void EmitFPSaturate32(EmitContext& ctx); void EmitFPSaturate64(EmitContext& ctx); -void EmitFPRoundEven16(EmitContext& ctx); -void EmitFPRoundEven32(EmitContext& ctx); -void EmitFPRoundEven64(EmitContext& ctx); -void EmitFPFloor16(EmitContext& ctx); -void EmitFPFloor32(EmitContext& ctx); -void EmitFPFloor64(EmitContext& ctx); -void EmitFPCeil16(EmitContext& ctx); -void EmitFPCeil32(EmitContext& ctx); -void EmitFPCeil64(EmitContext& ctx); -void EmitFPTrunc16(EmitContext& ctx); -void EmitFPTrunc32(EmitContext& ctx); -void EmitFPTrunc64(EmitContext& ctx); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); @@ -201,25 +204,25 @@ void EmitLogicalOr(EmitContext& ctx); void EmitLogicalAnd(EmitContext& ctx); void EmitLogicalXor(EmitContext& ctx); void EmitLogicalNot(EmitContext& ctx); -void EmitConvertS16F16(EmitContext& ctx); -void EmitConvertS16F32(EmitContext& ctx); -void EmitConvertS16F64(EmitContext& ctx); -void EmitConvertS32F16(EmitContext& ctx); -void EmitConvertS32F32(EmitContext& ctx); -void EmitConvertS32F64(EmitContext& ctx); -void EmitConvertS64F16(EmitContext& ctx); -void EmitConvertS64F32(EmitContext& ctx); -void EmitConvertS64F64(EmitContext& ctx); -void EmitConvertU16F16(EmitContext& ctx); -void EmitConvertU16F32(EmitContext& ctx); -void EmitConvertU16F64(EmitContext& ctx); -void EmitConvertU32F16(EmitContext& ctx); -void EmitConvertU32F32(EmitContext& ctx); -void EmitConvertU32F64(EmitContext& ctx); -void EmitConvertU64F16(EmitContext& ctx); -void EmitConvertU64F32(EmitContext& ctx); -void EmitConvertU64F64(EmitContext& ctx); -void EmitConvertU64U32(EmitContext& ctx); -void EmitConvertU32U64(EmitContext& ctx); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 49c200498..e0d1ba413 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -34,24 +34,32 @@ void EmitPackUint2x32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitUnpackUint2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); } -void EmitPackFloat2x16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[1], value); } -void EmitUnpackFloat2x16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F16[2], value); } -void EmitPackDouble2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitPackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpPackHalf2x16(ctx.U32[1], value); } -void EmitUnpackDouble2x32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { + return ctx.OpUnpackHalf2x16(ctx.F32[2], value); +} + +Id EmitPackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.F64[1], value); +} + +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { + return ctx.OpBitcast(ctx.U32[2], value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 348e4796d..c950854a0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -6,28 +6,28 @@ namespace Shader::Backend::SPIRV { -void EmitCompositeConstructU32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { + return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); } -void EmitCompositeConstructU32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { + return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); } -void EmitCompositeConstructU32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { + return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); } -void EmitCompositeExtractU32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id vector, u32 index) { - return ctx.OpCompositeExtract(ctx.U32[1], vector, index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } -void EmitCompositeExtractU32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.U32[1], composite, index); } void EmitCompositeConstructF16x2(EmitContext&) { @@ -42,16 +42,16 @@ void EmitCompositeConstructF16x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitCompositeExtractF16x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeExtractF16x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } -void EmitCompositeExtractF16x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); } void EmitCompositeConstructF32x2(EmitContext&) { @@ -66,16 +66,16 @@ void EmitCompositeConstructF32x4(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitCompositeExtractF32x2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } -void EmitCompositeExtractF32x3(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } -void EmitCompositeExtractF32x4(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); } void EmitCompositeConstructF64x2(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index 6c4199664..48755b827 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -11,7 +11,7 @@ void EmitBranch(EmitContext& ctx, IR::Block* label) { } void EmitBranchConditional(EmitContext& ctx, Id condition, IR::Block* true_label, - IR::Block* false_label) { + IR::Block* false_label) { ctx.OpBranchConditional(condition, true_label->Definition(), false_label->Definition()); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..76ccaffce --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -0,0 +1,89 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/spirv/emit_spirv.h" + +namespace Shader::Backend::SPIRV { + +Id EmitConvertS16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); +} + +Id EmitConvertS32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U32[1], value); +} + +Id EmitConvertS64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertS64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToS(ctx.U64, value); +} + +Id EmitConvertU16F16(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU16F64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); +} + +Id EmitConvertU32F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU32F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U32[1], value); +} + +Id EmitConvertU64F16(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F32(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64F64(EmitContext& ctx, Id value) { + return ctx.OpConvertFToU(ctx.U64, value); +} + +Id EmitConvertU64U32(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U64, value); +} + +Id EmitConvertU32U64(EmitContext& ctx, Id value) { + return ctx.OpUConvert(ctx.U32[1], value); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index d24fbb353..9ef180531 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -169,52 +169,52 @@ void EmitFPSaturate64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitFPRoundEven16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven16(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F16[1], value); } -void EmitFPRoundEven32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven32(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F32[1], value); } -void EmitFPRoundEven64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRoundEven64(EmitContext& ctx, Id value) { + return ctx.OpRoundEven(ctx.F64[1], value); } -void EmitFPFloor16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor16(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F16[1], value); } -void EmitFPFloor32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor32(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F32[1], value); } -void EmitFPFloor64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPFloor64(EmitContext& ctx, Id value) { + return ctx.OpFloor(ctx.F64[1], value); } -void EmitFPCeil16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil16(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F16[1], value); } -void EmitFPCeil32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil32(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F32[1], value); } -void EmitFPCeil64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCeil64(EmitContext& ctx, Id value) { + return ctx.OpCeil(ctx.F64[1], value); } -void EmitFPTrunc16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc16(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F16[1], value); } -void EmitFPTrunc32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc32(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F32[1], value); } -void EmitFPTrunc64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPTrunc64(EmitContext& ctx, Id value) { + return ctx.OpTrunc(ctx.F64[1], value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index a1d16b81e..22117a4ee 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -113,20 +113,4 @@ Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); } -void EmitLogicalOr(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalAnd(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalXor(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitLogicalNot(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index ff2f4fb74..c5a07252f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,83 +6,19 @@ namespace Shader::Backend::SPIRV { -void EmitConvertS16F16(EmitContext&) { +void EmitLogicalOr(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS16F32(EmitContext&) { +void EmitLogicalAnd(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS16F64(EmitContext&) { +void EmitLogicalXor(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitConvertS32F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS32F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertS64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU16F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64F64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU64U32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitConvertU32U64(EmitContext&) { +void EmitLogicalNot(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 77d698ffd..808c1b401 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -94,8 +94,7 @@ void EmitLoadStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset) { +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } @@ -129,8 +128,8 @@ void EmitWriteStorageS16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, - const IR::Value& offset, Id value) { +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { if (!binding.IsImmediate()) { throw NotImplementedException("Dynamic storage buffer indexing"); } @@ -140,8 +139,19 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ctx.OpStore(pointer, value); } -void EmitWriteStorage64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + if (!binding.IsImmediate()) { + throw NotImplementedException("Dynamic storage buffer indexing"); + } + // TODO: Support reinterpreting bindings, guaranteed to be aligned + const Id ssbo{ctx.ssbos[binding.U32()]}; + const Id low_index{StorageIndex(ctx, offset, sizeof(u32))}; + const Id high_index{ctx.OpIAdd(ctx.U32[1], low_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id low_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, low_index)}; + const Id high_pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, high_index)}; + ctx.OpStore(low_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); + ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); } void EmitWriteStorage128(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h index 16b4ae888..51c2f15cf 100644 --- a/src/shader_recompiler/frontend/ir/condition.h +++ b/src/shader_recompiler/frontend/ir/condition.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index f42489d41..559ab9cca 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -547,11 +547,11 @@ F32 IREmitter::FPSqrt(const F32& value) { F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPSaturate16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPSaturate32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPSaturate64, value); default: ThrowInvalidType(value.Type()); @@ -560,11 +560,11 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPRoundEven16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPRoundEven32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRoundEven64, value); default: ThrowInvalidType(value.Type()); @@ -573,11 +573,11 @@ F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPFloor16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPFloor32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPFloor64, value); default: ThrowInvalidType(value.Type()); @@ -586,11 +586,11 @@ F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPCeil16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPCeil32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPCeil64, value); default: ThrowInvalidType(value.Type()); @@ -599,11 +599,11 @@ F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPTrunc16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPTrunc32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPTrunc64, value); default: ThrowInvalidType(value.Type()); @@ -729,33 +729,33 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertS64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertS64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertS64F64, value); default: ThrowInvalidType(value.Type()); @@ -769,33 +769,33 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { switch (bitsize) { case 16: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU16F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU16F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU16F64, value); default: ThrowInvalidType(value.Type()); } case 32: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU32F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU32F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU32F64, value); default: ThrowInvalidType(value.Type()); } case 64: switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::ConvertU64F16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::ConvertU64F32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::ConvertU64F64, value); default: ThrowInvalidType(value.Type()); @@ -829,10 +829,10 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { case 64: switch (value.Type()) { case Type::U32: + return Inst(Opcode::ConvertU64U32, value); + case Type::U64: // Nothing to do return value; - case Type::U64: - return Inst(Opcode::ConvertU64U32, value); default: break; } diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ee76db9ad..d6a9be87d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -216,6 +216,10 @@ void Inst::ReplaceUsesWith(Value replacement) { } } +void Inst::ReplaceOpcode(IR::Opcode opcode) { + op = opcode; +} + void Inst::Use(const Value& value) { Inst* const inst{value.Inst()}; ++inst->use_count; diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 5b244fa0b..321393dd7 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h @@ -86,6 +86,8 @@ public: void ReplaceUsesWith(Value replacement); + void ReplaceOpcode(IR::Opcode opcode); + template requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v) [[nodiscard]] FlagsType Flags() const noexcept { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index ede5e20c2..50da77535 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -119,8 +119,10 @@ OPCODE(PackUint2x32, U64, U32x OPCODE(UnpackUint2x32, U32x2, U64, ) OPCODE(PackFloat2x16, U32, F16x2, ) OPCODE(UnpackFloat2x16, F16x2, U32, ) -OPCODE(PackDouble2x32, U64, U32x2, ) -OPCODE(UnpackDouble2x32, U32x2, U64, ) +OPCODE(PackHalf2x16, U32, F32x2, ) +OPCODE(UnpackHalf2x16, F32x2, U32, ) +OPCODE(PackDouble2x32, F64, U32x2, ) +OPCODE(UnpackDouble2x32, U32x2, F64, ) // Pseudo-operation, handled specially at final emit OPCODE(GetZeroFromOp, U1, Opaque, ) diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp index 0ce99ef2a..8c301c3a1 100644 --- a/src/shader_recompiler/frontend/ir/program.cpp +++ b/src/shader_recompiler/frontend/ir/program.cpp @@ -35,4 +35,4 @@ std::string DumpProgram(const Program& program) { return ret; } -} // namespace Shader::IR \ No newline at end of file +} // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 8c44ebb29..16cdc12e2 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,6 +56,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool dest_format; BitField<10, 2, SrcFormat> src_format; BitField<12, 1, u64> is_signed; - BitField<39, 1, Rounding> rounding; + BitField<39, 2, Rounding> rounding; BitField<49, 1, u64> half; BitField<44, 1, u64> ftz; BitField<45, 1, u64> abs; @@ -55,6 +55,28 @@ size_t BitSize(DestFormat dest_format) { } } +IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { + union { + u64 raw; + BitField<20, 14, s64> offset; + BitField<34, 5, u64> binding; + } const cbuf{insn}; + if (cbuf.binding >= 18) { + throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); + } + if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { + throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); + } + if (cbuf.offset % 2 != 0) { + throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); + } + const IR::U32 binding{v.ir.Imm32(static_cast(cbuf.binding))}; + const IR::U32 byte_offset{v.ir.Imm32(static_cast(cbuf.offset) * 4 + 4)}; + const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; + const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; + return v.ir.PackDouble2x32(vector); +} + void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; @@ -82,19 +104,16 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { const size_t bitsize{BitSize(f2i.dest_format)}; const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; - v.X(f2i.dest_reg, result); + if (bitsize == 64) { + const IR::Value vector{v.ir.UnpackUint2x32(result)}; + v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); + v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + } else { + v.X(f2i.dest_reg, result); + } if (f2i.cc != 0) { - v.SetZFlag(v.ir.GetZeroFromOp(result)); - if (is_signed) { - v.SetSFlag(v.ir.GetSignFromOp(result)); - } else { - v.ResetSFlag(); - } - v.ResetCFlag(); - - // TODO: Investigate if out of bound conversions sets the overflow flag - v.ResetOFlag(); + throw NotImplementedException("F2I CC"); } } } // Anonymous namespace @@ -118,12 +137,25 @@ void TranslatorVisitor::F2I_reg(u64 insn) { f2i.base.src_format.Value()); } }()}; - TranslateF2I(*this, insn, op_a); } -void TranslatorVisitor::F2I_cbuf(u64) { - throw NotImplementedException("{}", Opcode::F2I_cbuf); +void TranslatorVisitor::F2I_cbuf(u64 insn) { + const F2I f2i{insn}; + const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { + switch (f2i.src_format) { + case SrcFormat::F16: + return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; + case SrcFormat::F32: + return GetCbufF(insn); + case SrcFormat::F64: { + return UnpackCbuf(*this, insn); + } + default: + throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); + } + }()}; + TranslateF2I(*this, insn, op_a); } void TranslatorVisitor::F2I_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 8bd468244..27aba2cf8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -11,7 +11,7 @@ namespace Shader::Maxwell { class TranslatorVisitor { public: - explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_} ,ir(block) {} + explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} Environment& env; IR::IREmitter ir; diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f2326dea1..f7f102f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::LocalInvocationId: info.uses_local_invocation_id = true; break; + case IR::Opcode::CompositeConstructF16x2: + case IR::Opcode::CompositeConstructF16x3: + case IR::Opcode::CompositeConstructF16x4: + case IR::Opcode::CompositeExtractF16x2: + case IR::Opcode::CompositeExtractF16x3: + case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::BitCastU16F16: + case IR::Opcode::BitCastF16U16: + case IR::Opcode::PackFloat2x16: + case IR::Opcode::UnpackFloat2x16: + case IR::Opcode::ConvertS16F16: + case IR::Opcode::ConvertS32F16: + case IR::Opcode::ConvertS64F16: + case IR::Opcode::ConvertU16F16: + case IR::Opcode::ConvertU32F16: + case IR::Opcode::ConvertU64F16: case IR::Opcode::FPAbs16: case IR::Opcode::FPAdd16: case IR::Opcode::FPCeil16: @@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: - info.uses_fp16; + info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: case IR::Opcode::FPAdd64: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9eb61b54c..4d4e88259 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) { bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { /* * We are looking for this pattern: - * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) - * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) - * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) - * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) - * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) - * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) + * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 + * %rhs_mul = IMul32 %rhs_bfe, %factor_b + * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 + * %rhs_mul = IMul32 %lhs_bfe, %factor_b + * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 + * %result = IAdd32 %lhs_shl, %rhs_mul * * And replacing it with * %result = IMul32 %factor_a, %factor_b diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..c7032f168 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -0,0 +1,79 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/frontend/ir/ir_emitter.h" +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Optimization { +namespace { +IR::Opcode Replace(IR::Opcode op) { + switch (op) { + case IR::Opcode::FPAbs16: + return IR::Opcode::FPAbs32; + case IR::Opcode::FPAdd16: + return IR::Opcode::FPAdd32; + case IR::Opcode::FPCeil16: + return IR::Opcode::FPCeil32; + case IR::Opcode::FPFloor16: + return IR::Opcode::FPFloor32; + case IR::Opcode::FPFma16: + return IR::Opcode::FPFma32; + case IR::Opcode::FPMul16: + return IR::Opcode::FPMul32; + case IR::Opcode::FPNeg16: + return IR::Opcode::FPNeg32; + case IR::Opcode::FPRoundEven16: + return IR::Opcode::FPRoundEven32; + case IR::Opcode::FPSaturate16: + return IR::Opcode::FPSaturate32; + case IR::Opcode::FPTrunc16: + return IR::Opcode::FPTrunc32; + case IR::Opcode::CompositeConstructF16x2: + return IR::Opcode::CompositeConstructF32x2; + case IR::Opcode::CompositeConstructF16x3: + return IR::Opcode::CompositeConstructF32x3; + case IR::Opcode::CompositeConstructF16x4: + return IR::Opcode::CompositeConstructF32x4; + case IR::Opcode::CompositeExtractF16x2: + return IR::Opcode::CompositeExtractF32x2; + case IR::Opcode::CompositeExtractF16x3: + return IR::Opcode::CompositeExtractF32x3; + case IR::Opcode::CompositeExtractF16x4: + return IR::Opcode::CompositeExtractF32x4; + case IR::Opcode::ConvertS16F16: + return IR::Opcode::ConvertS16F32; + case IR::Opcode::ConvertS32F16: + return IR::Opcode::ConvertS32F32; + case IR::Opcode::ConvertS64F16: + return IR::Opcode::ConvertS64F32; + case IR::Opcode::ConvertU16F16: + return IR::Opcode::ConvertU16F32; + case IR::Opcode::ConvertU32F16: + return IR::Opcode::ConvertU32F32; + case IR::Opcode::ConvertU64F16: + return IR::Opcode::ConvertU64F32; + case IR::Opcode::PackFloat2x16: + return IR::Opcode::PackHalf2x16; + case IR::Opcode::UnpackFloat2x16: + return IR::Opcode::UnpackHalf2x16; + default: + return op; + } +} +} // Anonymous namespace + +void LowerFp16ToFp32(IR::Program& program) { + for (IR::Function& function : program.functions) { + for (IR::Block* const block : function.blocks) { + for (IR::Inst& inst : block->Instructions()) { + inst.ReplaceOpcode(Replace(inst.Opcode())); + } + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 89e5811d3..38106308c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block); void GlobalMemoryToStorageBufferPass(IR::Program& program); void IdentityRemovalPass(IR::Function& function); +void LowerFp16ToFp32(IR::Program& program); void SsaRewritePass(std::span post_order_blocks); void VerificationPass(const IR::Function& function); diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index 050a37f18..abd44e323 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -67,8 +67,8 @@ int main() { ObjectPool inst_pool; ObjectPool block_pool; - FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; - // FileEnvironment env{"D:\\Shaders\\shader.bin"}; + // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; + FileEnvironment env{"D:\\Shaders\\shader.bin"}; block_pool.ReleaseContents(); inst_pool.ReleaseContents(); flow_block_pool.ReleaseContents(); @@ -76,5 +76,9 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - void(Backend::SPIRV::EmitSPIRV(env, program)); + const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; + std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); + std::fclose(file); + std::system("spirv-dis D:\\shader.spv"); } diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index f78813b5f..c10751b9d 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h @@ -18,7 +18,7 @@ public: } template - requires std::is_constructible_v [[nodiscard]] T* Create(Args&&... args) { + requires std::is_constructible_v[[nodiscard]] T* Create(Args&&... args) { return std::construct_at(Memory(), std::forward(args)...); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..a444d55d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -206,6 +206,8 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .codeSize = static_cast(code.size_bytes()), .pCode = code.data(), }); + /* + FIXME pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, @@ -224,6 +226,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ .basePipelineHandle = nullptr, .basePipelineIndex = 0, }); + */ } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4bf3e4819..c2a41a360 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -31,8 +31,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); @@ -180,6 +178,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { // TODO: Load from cache } const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + std::system("spirv-dis D:\\shader.spv"); + shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b757454c4..1b662f9f3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,8 +36,6 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" -#pragma optimize("", off) - namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index f214510da..85f903125 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -247,9 +247,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR .shaderStorageImageArrayDynamicIndexing = false, .shaderClipDistance = false, .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, + .shaderFloat64 = true, + .shaderInt64 = true, + .shaderInt16 = true, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, @@ -420,8 +420,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - is_float16_supported = false; + // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + // is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From e2bc05b17d91854cbb9c0ce3647141bf7d33143e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 20 Feb 2021 03:30:13 -0300 Subject: shader: Add denorm flush support --- src/shader_recompiler/CMakeLists.txt | 5 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 63 +++++++++++++++++-- src/shader_recompiler/backend/spirv/emit_spirv.h | 4 +- .../backend/spirv/emit_spirv_floating_point.cpp | 6 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 32 +++++----- src/shader_recompiler/frontend/ir/ir_emitter.h | 8 +-- src/shader_recompiler/frontend/ir/modifiers.h | 23 ++++--- .../impl/floating_point_conversion_integer.cpp | 19 ++++-- .../ir_opt/collect_shader_info_pass.cpp | 71 ++++++++++++++++++++-- .../global_memory_to_storage_buffer_pass.cpp | 1 - src/shader_recompiler/main.cpp | 13 +++- src/shader_recompiler/profile.h | 9 ++- src/shader_recompiler/recompiler.cpp | 5 +- src/shader_recompiler/recompiler.h | 4 +- src/shader_recompiler/shader_info.h | 7 ++- .../renderer_vulkan/vk_compute_pipeline.cpp | 7 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 ++++- src/video_core/vulkan_common/vulkan_device.cpp | 26 +++++--- src/video_core/vulkan_common/vulkan_device.h | 33 +++++----- src/video_core/vulkan_common/vulkan_wrapper.cpp | 2 - 20 files changed, 260 insertions(+), 93 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6047f3ebe..fbd4ec6dc 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC frontend/ir/ir_emitter.h frontend/ir/microinstruction.cpp frontend/ir/microinstruction.h + frontend/ir/modifiers.h frontend/ir/opcodes.cpp frontend/ir/opcodes.h frontend/ir/opcodes.inc @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC shader_info.h ) -target_include_directories(shader_recompiler PRIVATE sirit) -target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) -target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) +target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) add_executable(shader_util main.cpp) target_link_libraries(shader_util PRIVATE shader_recompiler) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 4ce07c281..2519e446a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -14,8 +14,6 @@ #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" -#pragma optimize("", off) - namespace Shader::Backend::SPIRV { namespace { template @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { throw NotImplementedException("Phi node type {}", type); } } + +void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, + Id main_func) { + if (!profile.support_float_controls) { + return; + } + const Info& info{program.info}; + if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && + !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { + return; + } + ctx.AddExtension("SPV_KHR_float_controls"); + + if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); + } else if (info.uses_fp32_denorms_flush) { + if (profile.support_fp32_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); + } else { + // Drivers will most likely flush denorms by default, no need to warn + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp32_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); + } else { + // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); + } + } + if (!profile.support_separate_denorm_behavior) { + // No separate denorm behavior + return; + } + if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { + // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); + } else if (info.uses_fp16_denorms_flush) { + if (profile.support_fp16_denorm_flush) { + ctx.AddCapability(spv::Capability::DenormFlushToZero); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // Same as fp32, no need to warn as most drivers will flush by default + } + } else if (info.uses_fp32_denorms_preserve) { + if (profile.support_fp16_denorm_preserve) { + ctx.AddCapability(spv::Capability::DenormPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + } else { + // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); + } + } +} } // Anonymous namespace -std::vector EmitSPIRV(Environment& env, IR::Program& program) { +std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { EmitContext ctx{program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) @@ -131,10 +181,11 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.OpFunctionEnd(); } boost::container::small_vector interfaces; - if (program.info.uses_workgroup_id) { + const Info& info{program.info}; + if (info.uses_workgroup_id) { interfaces.push_back(ctx.workgroup_id); } - if (program.info.uses_local_invocation_id) { + if (info.uses_local_invocation_id) { interfaces.push_back(ctx.local_invocation_id); } const std::span interfaces_span(interfaces.data(), interfaces.size()); @@ -144,6 +195,8 @@ std::vector EmitSPIRV(Environment& env, IR::Program& program) { ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], workgroup_size[2]); + SetupDenormControl(profile, program, ctx, func); + return ctx.Assemble(); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 2b59c0b72..de624a151 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -11,10 +11,12 @@ #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(Environment& env, IR::Program& program); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, Environment& env, + IR::Program& program); // Microinstruction emitters Id EmitPhi(EmitContext& ctx, IR::Inst* inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 9ef180531..c9687de37 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::NoContraction); } switch (flags.rounding) { + case IR::FpRounding::DontCare: + break; case IR::FpRounding::RN: + ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); break; case IR::FpRounding::RM: ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); break; } - if (flags.fmz_mode != IR::FmzMode::FTZ) { - throw NotImplementedException("Denorm management not implemented"); - } return op; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 559ab9cca..8f120a2f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } -F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { +F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPRoundEven16, value); + return Inst(Opcode::FPRoundEven16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPRoundEven32, value); + return Inst(Opcode::FPRoundEven32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPRoundEven64, value); + return Inst(Opcode::FPRoundEven64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { +F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPFloor16, value); + return Inst(Opcode::FPFloor16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPFloor32, value); + return Inst(Opcode::FPFloor32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPFloor64, value); + return Inst(Opcode::FPFloor64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { +F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPCeil16, value); + return Inst(Opcode::FPCeil16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPCeil32, value); + return Inst(Opcode::FPCeil32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPCeil64, value); + return Inst(Opcode::FPCeil64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } } -F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { +F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: - return Inst(Opcode::FPTrunc16, value); + return Inst(Opcode::FPTrunc16, Flags{control}, value); case Type::F32: - return Inst(Opcode::FPTrunc32, value); + return Inst(Opcode::FPTrunc32, Flags{control}, value); case Type::F64: - return Inst(Opcode::FPTrunc64, value); + return Inst(Opcode::FPTrunc64, Flags{control}, value); default: ThrowInvalidType(value.Type()); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 24b012a39..959f4f9da 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -129,10 +129,10 @@ public: [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); - [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index c288eede0..44652eae7 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h @@ -4,25 +4,30 @@ #pragma once +#include "common/common_types.h" + namespace Shader::IR { enum class FmzMode : u8 { - None, // Denorms are not flushed, NAN is propagated (nouveau) - FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) - FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + DontCare, // Not specified for this instruction + FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) + FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) + None, // Denorms are not flushed, NAN is propagated (nouveau) }; enum class FpRounding : u8 { - RN, // Round to nearest even, - RM, // Round towards negative infinity - RP, // Round towards positive infinity - RZ, // Round towards zero + DontCare, // Not specified for this instruction + RN, // Round to nearest even, + RM, // Round towards negative infinity + RP, // Round towards positive infinity + RZ, // Round towards zero }; struct FpControl { bool no_contraction{false}; - FpRounding rounding{FpRounding::RN}; - FmzMode fmz_mode{FmzMode::FTZ}; + FpRounding rounding{FpRounding::DontCare}; + FmzMode fmz_mode{FmzMode::DontCare}; }; static_assert(sizeof(FpControl) <= sizeof(u32)); + } // namespace Shader::IR diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index ae2d37405..4d82a0009 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // F2I is used to convert from a floating point value to an integer const F2I f2i{insn}; + const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && + f2i.dest_format != DestFormat::I64}; + IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; + if (denorm_cares) { + fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; + } + const IR::FpControl fp_control{ + .no_contraction{true}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmz_mode}, + }; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 rounded_value{[&] { switch (f2i.rounding) { case Rounding::Round: - return v.ir.FPRoundEven(op_a); + return v.ir.FPRoundEven(op_a, fp_control); case Rounding::Floor: - return v.ir.FPFloor(op_a); + return v.ir.FPFloor(op_a, fp_control); case Rounding::Ceil: - return v.ir.FPCeil(op_a); + return v.ir.FPCeil(op_a, fp_control); case Rounding::Trunc: - return v.ir.FPTrunc(op_a); + return v.ir.FPTrunc(op_a, fp_control); default: throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f7f102f53..6662ef4cd 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -2,23 +2,28 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "shader_recompiler/frontend/ir/microinstruction.h" +#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" namespace Shader::Optimization { namespace { -void AddConstantBufferDescriptor(Info& info, u32 index) { - auto& descriptor{info.constant_buffers.at(index)}; - if (descriptor) { +void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { + if (count != 1) { + throw NotImplementedException("Constant buffer descriptor indexing"); + } + if ((info.constant_buffer_mask & (1U << index)) != 0) { return; } - descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ + info.constant_buffer_mask |= 1U << index; + info.constant_buffer_descriptors.push_back({ .index{index}, .count{1}, }); } -void Visit(Info& info, IR::Inst& inst) { +void VisitUsages(Info& info, IR::Inst& inst) { switch (inst.Opcode()) { case IR::Opcode::WorkgroupId: info.uses_workgroup_id = true; @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { break; case IR::Opcode::GetCbuf: if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32()); + AddConstantBufferDescriptor(info, index.U32(), 1); } else { throw NotImplementedException("Constant buffer with non-immediate index"); } @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { break; } } + +void VisitFpModifiers(Info& info, IR::Inst& inst) { + switch (inst.Opcode()) { + case IR::Opcode::FPAdd16: + case IR::Opcode::FPFma16: + case IR::Opcode::FPMul16: + case IR::Opcode::FPRoundEven16: + case IR::Opcode::FPFloor16: + case IR::Opcode::FPCeil16: + case IR::Opcode::FPTrunc16: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp16_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp16_denorms_preserve = true; + break; + } + break; + } + case IR::Opcode::FPAdd32: + case IR::Opcode::FPFma32: + case IR::Opcode::FPMul32: + case IR::Opcode::FPRoundEven32: + case IR::Opcode::FPFloor32: + case IR::Opcode::FPCeil32: + case IR::Opcode::FPTrunc32: { + const auto control{inst.Flags()}; + switch (control.fmz_mode) { + case IR::FmzMode::DontCare: + break; + case IR::FmzMode::FTZ: + case IR::FmzMode::FMZ: + info.uses_fp32_denorms_flush = true; + break; + case IR::FmzMode::None: + info.uses_fp32_denorms_preserve = true; + break; + } + break; + } + default: + break; + } +} + +void Visit(Info& info, IR::Inst& inst) { + VisitUsages(info, inst); + VisitFpModifiers(info, inst); +} } // Anonymous namespace void CollectShaderInfoPass(IR::Program& program) { diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index bf230a850..03bd547b7 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { .cbuf_offset{storage_buffer.offset}, .count{1}, }); - info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); ++storage_index; } for (const StorageInst& storage_inst : to_replace) { diff --git a/src/shader_recompiler/main.cpp b/src/shader_recompiler/main.cpp index abd44e323..72565f477 100644 --- a/src/shader_recompiler/main.cpp +++ b/src/shader_recompiler/main.cpp @@ -60,6 +60,17 @@ void RunDatabase() { fmt::print(stdout, "{} ms", duration_cast(t - t0).count() / double(N)); } +static constexpr Profile PROFILE{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = true, + .support_separate_rounding_mode = true, + .support_fp16_denorm_preserve = true, + .support_fp32_denorm_preserve = true, + .support_fp16_denorm_flush = true, + .support_fp32_denorm_flush = true, +}; + int main() { // RunDatabase(); @@ -76,7 +87,7 @@ int main() { fmt::print(stdout, "{}\n", cfg.Dot()); IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; fmt::print(stdout, "{}\n", IR::DumpProgram(program)); - const std::vector spirv{Backend::SPIRV::EmitSPIRV(env, program)}; + const std::vector spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); std::fclose(file); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c96d783b7..9881bebab 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -7,7 +7,14 @@ namespace Shader { struct Profile { - bool unified_descriptor_binding; + bool unified_descriptor_binding{}; + bool support_float_controls{}; + bool support_separate_denorm_behavior{}; + bool support_separate_rounding_mode{}; + bool support_fp16_denorm_preserve{}; + bool support_fp32_denorm_preserve{}; + bool support_fp16_denorm_flush{}; + bool support_fp32_denorm_flush{}; }; } // namespace Shader diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index b25081e39..527e19c27 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -14,14 +14,15 @@ namespace Shader { -std::pair> RecompileSPIRV(Environment& env, u32 start_address) { +std::pair> RecompileSPIRV(const Profile& profile, Environment& env, + u32 start_address) { ObjectPool flow_block_pool; ObjectPool inst_pool; ObjectPool block_pool; Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; - return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; + return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 4cb973878..2529463ae 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -9,10 +9,12 @@ #include "common/common_types.h" #include "shader_recompiler/environment.h" +#include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" namespace Shader { -[[nodiscard]] std::pair> RecompileSPIRV(Environment& env, u32 start_address); +[[nodiscard]] std::pair> RecompileSPIRV(const Profile& profile, + Environment& env, u32 start_address); } // namespace Shader diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f49a79368..8766bf13e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -31,14 +31,15 @@ struct Info { bool uses_local_invocation_id{}; bool uses_fp16{}; bool uses_fp64{}; + bool uses_fp16_denorms_flush{}; + bool uses_fp16_denorms_preserve{}; + bool uses_fp32_denorms_flush{}; + bool uses_fp32_denorms_preserve{}; u32 constant_buffer_mask{}; - std::array constant_buffers{}; boost::container::static_vector constant_buffer_descriptors; - - std::array storage_buffers{}; boost::container::static_vector storage_buffers_descriptors; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 588ce6139..a658a3276 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip })} {} void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { - u32 enabled_uniforms{}; - for (const auto& desc : info.constant_buffer_descriptors) { - enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; - } - buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); - + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index c2a41a360..49ff911d6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { if (const std::optional cached_hash{env.Analyze(qmd.program_start)}) { // TODO: Load from cache } - const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; + const auto& float_control{device.FloatControlProperties()}; + const Shader::Profile profile{ + .unified_descriptor_binding = true, + .support_float_controls = true, + .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == + VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_separate_rounding_mode = + float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, + .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, + .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, + .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, + .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + }; + const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 85f903125..4887d6fd9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CheckSuitability(surface != nullptr); SetupFamilies(surface); SetupFeatures(); + SetupProperties(); const auto queue_cis = GetDeviceQueueCreateInfos(); const std::vector extensions = LoadExtensions(surface != nullptr); @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); - - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); } Device::~Device() = default; @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; - VkPhysicalDeviceFeatures2 features2{}; + VkPhysicalDeviceFeatures2KHR features2{}; features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; features2.pNext = &robustness2; @@ -684,7 +684,7 @@ std::vector Device::LoadExtensions(bool requires_surface) { true); } } - VkPhysicalDeviceFeatures2KHR features; + VkPhysicalDeviceFeatures2KHR features{}; features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; VkPhysicalDeviceProperties2KHR physical_properties; @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { } void Device::SetupFeatures() { - const auto supported_features{physical.GetFeatures()}; - is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; + const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; + is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; + is_shader_storage_image_multisample = features.shaderStorageImageMultisample; is_blit_depth_stencil_supported = TestDepthStencilBlits(); - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); + is_optimal_astc_supported = IsOptimalAstcSupported(features); +} + +void Device::SetupProperties() { + float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR properties2{}; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = &float_controls; + + physical.GetProperties2KHR(properties2); } void Device::CollectTelemetryParameters() { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 96c0f8c60..82bccc8f0 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -128,6 +128,11 @@ public: return properties.limits.maxComputeSharedMemorySize; } + /// Returns float control properties of the device. + const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { + return float_controls; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -223,11 +228,6 @@ public: return reported_extensions; } - /// Returns true if the setting for async shader compilation is enabled. - bool UseAsynchronousShaders() const { - return use_asynchronous_shaders; - } - u64 GetDeviceLocalMemory() const { return device_access_memory; } @@ -245,6 +245,9 @@ private: /// Sets up device features. void SetupFeatures(); + /// Sets up device properties. + void SetupProperties(); + /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -267,14 +270,15 @@ private: bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, FormatType format_type) const; - VkInstance instance; ///< Vulkan instance. - vk::DeviceDispatch dld; ///< Device function pointers. - vk::PhysicalDevice physical; ///< Physical device. - VkPhysicalDeviceProperties properties; ///< Device properties. - vk::Device logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 instance_version{}; ///< Vulkan onstance version. + VkInstance instance; ///< Vulkan instance. + vk::DeviceDispatch dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. + VkPhysicalDeviceProperties properties; ///< Device properties. + VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. + vk::Device logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 instance_version{}; ///< Vulkan onstance version. u32 graphics_family{}; ///< Main graphics queue family index. u32 present_family{}; ///< Main present queue family index. VkDriverIdKHR driver_id{}; ///< Driver ID. @@ -301,9 +305,6 @@ private: bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached - // Asynchronous Graphics Pipeline setting - bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline - // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 2aa0ffbe6..33fb74bfb 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; case VkResult::VK_ERROR_UNKNOWN: return "VK_ERROR_UNKNOWN"; - case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: - return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; case VkResult::VK_THREAD_IDLE_KHR: return "VK_THREAD_IDLE_KHR"; case VkResult::VK_THREAD_DONE_KHR: -- cgit v1.2.3 From 704c6f353f68745168902c6c66c04bb730bd30e6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 17:50:14 -0300 Subject: shader: Rename, implement FADD.SAT and P2R (imm) --- src/shader_recompiler/CMakeLists.txt | 3 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 40 ++++++------ .../backend/spirv/emit_spirv_floating_point.cpp | 58 +++++++---------- .../backend/spirv/emit_spirv_integer.cpp | 75 +++++++++++++++------- .../backend/spirv/emit_spirv_select.cpp | 4 +- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 16 ++--- src/shader_recompiler/frontend/ir/pred.h | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 2 +- .../maxwell/translate/impl/floating_point_add.cpp | 20 +++--- .../impl/floating_point_conversion_integer.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +- .../translate/impl/floating_point_multiply.cpp | 2 +- .../frontend/maxwell/translate/impl/impl.cpp | 17 ++++- .../frontend/maxwell/translate/impl/impl.h | 7 +- .../maxwell/translate/impl/integer_add.cpp | 4 +- .../translate/impl/move_predicate_to_register.cpp | 66 +++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ---- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 +- 18 files changed, 213 insertions(+), 127 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index fbd4ec6dc..802527255 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -74,9 +74,10 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/integer_short_multiply_add.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_memory.cpp - frontend/maxwell/translate/impl/not_implemented.cpp + frontend/maxwell/translate/impl/move_predicate_to_register.cpp frontend/maxwell/translate/impl/move_register.cpp frontend/maxwell/translate/impl/move_special_register.cpp + frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h ir_opt/collect_shader_info_pass.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index de624a151..922e294a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -110,7 +110,7 @@ void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); void EmitSelect8(EmitContext& ctx); void EmitSelect16(EmitContext& ctx); -void EmitSelect32(EmitContext& ctx); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitSelect64(EmitContext& ctx); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); @@ -130,9 +130,9 @@ void EmitGetZeroFromOp(EmitContext& ctx); void EmitGetSignFromOp(EmitContext& ctx); void EmitGetCarryFromOp(EmitContext& ctx); void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitFPAbs16(EmitContext& ctx); -void EmitFPAbs32(EmitContext& ctx); -void EmitFPAbs64(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); @@ -146,9 +146,9 @@ void EmitFPMin64(EmitContext& ctx); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -void EmitFPNeg16(EmitContext& ctx); -void EmitFPNeg32(EmitContext& ctx); -void EmitFPNeg64(EmitContext& ctx); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); void EmitFPRecip32(EmitContext& ctx); void EmitFPRecip64(EmitContext& ctx); void EmitFPRecipSqrt32(EmitContext& ctx); @@ -161,9 +161,9 @@ void EmitFPExp2NotReduced(EmitContext& ctx); void EmitFPCos(EmitContext& ctx); void EmitFPCosNotReduced(EmitContext& ctx); void EmitFPLog2(EmitContext& ctx); -void EmitFPSaturate16(EmitContext& ctx); -void EmitFPSaturate32(EmitContext& ctx); -void EmitFPSaturate64(EmitContext& ctx); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); Id EmitFPRoundEven16(EmitContext& ctx, Id value); Id EmitFPRoundEven32(EmitContext& ctx, Id value); Id EmitFPRoundEven64(EmitContext& ctx, Id value); @@ -186,21 +186,21 @@ void EmitIAbs32(EmitContext& ctx); Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); void EmitShiftRightLogical32(EmitContext& ctx); void EmitShiftRightArithmetic32(EmitContext& ctx); -void EmitBitwiseAnd32(EmitContext& ctx); -void EmitBitwiseOr32(EmitContext& ctx); -void EmitBitwiseXor32(EmitContext& ctx); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b); void EmitBitFieldInsert(EmitContext& ctx); void EmitBitFieldSExtract(EmitContext& ctx); Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitULessThan(EmitContext& ctx); -void EmitIEqual(EmitContext& ctx); -void EmitSLessThanEqual(EmitContext& ctx); -void EmitULessThanEqual(EmitContext& ctx); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -void EmitUGreaterThan(EmitContext& ctx); -void EmitINotEqual(EmitContext& ctx); -void EmitSGreaterThanEqual(EmitContext& ctx); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); void EmitLogicalOr(EmitContext& ctx); void EmitLogicalAnd(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index c9687de37..47f87054b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -12,37 +12,21 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { if (flags.no_contraction) { ctx.Decorate(op, spv::Decoration::NoContraction); } - switch (flags.rounding) { - case IR::FpRounding::DontCare: - break; - case IR::FpRounding::RN: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); - break; - case IR::FpRounding::RM: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); - break; - case IR::FpRounding::RP: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTP); - break; - case IR::FpRounding::RZ: - ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); - break; - } return op; } } // Anonymous namespace -void EmitFPAbs16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs16(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F16[1], value); } -void EmitFPAbs32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs32(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F32[1], value); } -void EmitFPAbs64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPAbs64(EmitContext& ctx, Id value) { + return ctx.OpFAbs(ctx.F64[1], value); } Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { @@ -97,16 +81,16 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); } -void EmitFPNeg16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg16(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F16[1], value); } -void EmitFPNeg32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg32(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F32[1], value); } -void EmitFPNeg64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPNeg64(EmitContext& ctx, Id value) { + return ctx.OpFNegate(ctx.F64[1], value); } void EmitFPRecip32(EmitContext&) { @@ -157,16 +141,22 @@ void EmitFPLog2(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitFPSaturate16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate16(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; + const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); } -void EmitFPSaturate32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate32(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; + const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; + return ctx.OpFClamp(ctx.F32[1], value, zero, one); } -void EmitFPSaturate64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSaturate64(EmitContext& ctx, Id value) { + const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; + const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; + return ctx.OpFClamp(ctx.F64[1], value, zero, one); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 22117a4ee..4c0b5990d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -7,10 +7,39 @@ namespace Shader::Backend::SPIRV { Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { - if (inst->HasAssociatedPseudoOperation()) { - throw NotImplementedException("Pseudo-operations on IAdd32"); + Id result{}; + if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { + const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; + const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; + result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); + + const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; + carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); + carry->Invalidate(); + } else { + result = ctx.OpIAdd(ctx.U32[1], a, b); } - return ctx.OpIAdd(ctx.U32[1], a, b); + if (IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}) { + zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); + zero->Invalidate(); + } + if (IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}) { + sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); + sign->Invalidate(); + } + if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { + // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c + constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; + const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)}; + + const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; + const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; + const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; + overflow->SetDefinition(carry_flag); + overflow->Invalidate(); + } + return result; } void EmitIAdd64(EmitContext&) { @@ -49,16 +78,16 @@ void EmitShiftRightArithmetic32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitBitwiseAnd32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseAnd32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseAnd(ctx.U32[1], a, b); } -void EmitBitwiseOr32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseOr32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseOr(ctx.U32[1], a, b); } -void EmitBitwiseXor32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitBitwiseXor32(EmitContext& ctx, Id a, Id b) { + return ctx.OpBitwiseXor(ctx.U32[1], a, b); } void EmitBitFieldInsert(EmitContext&) { @@ -77,36 +106,36 @@ Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1, lhs, rhs); } -void EmitULessThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1, lhs, rhs); } -void EmitIEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpIEqual(ctx.U1, lhs, rhs); } -void EmitSLessThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); } -void EmitULessThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); } Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); } -void EmitUGreaterThan(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); } -void EmitINotEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpINotEqual(ctx.U1, lhs, rhs); } -void EmitSGreaterThanEqual(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); } Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 8d5062724..eb1926a4d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -14,8 +14,8 @@ void EmitSelect16(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSelect32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); } void EmitSelect64(EmitContext&) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 8f120a2f6..34c2f67fb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -468,11 +468,11 @@ F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPAbs16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPAbs32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPAbs64, value); default: ThrowInvalidType(value.Type()); @@ -481,11 +481,11 @@ F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { switch (value.Type()) { - case Type::U16: + case Type::F16: return Inst(Opcode::FPNeg16, value); - case Type::U32: + case Type::F32: return Inst(Opcode::FPNeg32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPNeg64, value); default: ThrowInvalidType(value.Type()); @@ -495,10 +495,10 @@ F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { F16F32F64 result{value}; if (abs) { - result = FPAbs(value); + result = FPAbs(result); } if (neg) { - result = FPNeg(value); + result = FPNeg(result); } return result; } diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h index c6f2f82bf..4e7f32423 100644 --- a/src/shader_recompiler/frontend/ir/pred.h +++ b/src/shader_recompiler/frontend/ir/pred.h @@ -19,8 +19,8 @@ enum class Pred : u64 { PT, }; -constexpr size_t NUM_USER_PREDS = 6; -constexpr size_t NUM_PREDS = 7; +constexpr size_t NUM_USER_PREDS = 7; +constexpr size_t NUM_PREDS = 8; [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { return static_cast(pred); diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 16cdc12e2..ed5dbf41f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,12 +56,12 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool src_a; } const fadd{insn}; - if (sat) { - throw NotImplementedException("FADD SAT"); - } if (cc) { throw NotImplementedException("FADD CC"); } @@ -31,7 +27,11 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, }; - v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); + IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fadd.dest_reg, value); } void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -53,15 +53,15 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetReg20F(insn)); + FADD(*this, insn, GetRegFloat20(insn)); } -void TranslatorVisitor::FADD_cbuf(u64) { - throw NotImplementedException("FADD (cbuf)"); +void TranslatorVisitor::FADD_cbuf(u64 insn) { + FADD(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FADD_imm(u64) { - throw NotImplementedException("FADD (imm)"); +void TranslatorVisitor::FADD_imm(u64 insn) { + FADD(*this, insn, GetFloatImm20(insn)); } void TranslatorVisitor::FADD32I(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 4d82a0009..81175627f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -158,7 +158,7 @@ void TranslatorVisitor::F2I_cbuf(u64 insn) { case SrcFormat::F16: return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; case SrcFormat::F32: - return GetCbufF(insn); + return GetFloatCbuf(insn); case SrcFormat::F64: { return UnpackCbuf(*this, insn); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 1464f2807..758700d3c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn)); + FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 1b1d38be7..5c38d3fc1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -91,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetReg20F(insn)); + return FMUL(*this, insn, GetRegFloat20(insn)); } void TranslatorVisitor::FMUL_cbuf(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 079e3497f..be17bb0d9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetReg20F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetReg39F(u64 insn) { +IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -73,7 +73,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { return ir.GetCbuf(binding, byte_offset); } -IR::F32 TranslatorVisitor::GetCbufF(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { return ir.BitCast(GetCbuf(insn)); } @@ -88,6 +88,17 @@ IR::U32 TranslatorVisitor::GetImm20(u64 insn) { return ir.Imm32(value); } +IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { + union { + u64 raw; + BitField<20, 19, u64> value; + BitField<56, 1, u64> is_negative; + } const imm{insn}; + const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; + const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; + return ir.Imm32(value); +} + IR::U32 TranslatorVisitor::GetImm32(u64 insn) { union { u64 raw; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 27aba2cf8..4d4cf2ebf 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,13 +304,14 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetReg20F(u64 insn); - [[nodiscard]] IR::F32 GetReg39F(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); + [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); - [[nodiscard]] IR::F32 GetCbufF(u64 insn); + [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp index 623e78ff8..1493e1815 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp @@ -84,8 +84,8 @@ void TranslatorVisitor::IADD_cbuf(u64 insn) { IADD(*this, insn, GetCbuf(insn)); } -void TranslatorVisitor::IADD_imm(u64) { - throw NotImplementedException("IADD (imm)"); +void TranslatorVisitor::IADD_imm(u64 insn) { + IADD(*this, insn, GetImm20(insn)); } void TranslatorVisitor::IADD32I(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp @@ -0,0 +1,66 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + PR, + CC, +}; +} // Anonymous namespace + +void TranslatorVisitor::P2R_reg(u64) { + throw NotImplementedException("P2R (reg)"); +} + +void TranslatorVisitor::P2R_cbuf(u64) { + throw NotImplementedException("P2R (cbuf)"); +} + +void TranslatorVisitor::P2R_imm(u64 insn) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src; + BitField<40, 1, Mode> mode; + BitField<41, 2, u64> byte_selector; + } const p2r{insn}; + + const u32 mask{GetImm20(insn).U32()}; + const bool pr_mode{p2r.mode == Mode::PR}; + const u32 num_items{pr_mode ? 7U : 4U}; + const u32 offset{static_cast(p2r.byte_selector) * 8}; + IR::U32 insert{ir.Imm32(0)}; + for (u32 index = 0; index < num_items; ++index) { + if (((mask >> index) & 1) == 0) { + continue; + } + const IR::U1 cond{[this, index, pr_mode] { + if (pr_mode) { + return ir.GetPred(IR::Pred{index}); + } + switch (index) { + case 0: + return ir.GetZFlag(); + case 1: + return ir.GetSFlag(); + case 2: + return ir.GetCFlag(); + case 3: + return ir.GetOFlag(); + } + throw LogicError("Unreachable P2R index"); + }()}; + const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; + insert = ir.BitwiseOr(insert, bit); + } + const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; + X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 6b2a1356b..628cf1c14 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -633,18 +633,6 @@ void TranslatorVisitor::OUT_imm(u64) { ThrowNotImplemented(Opcode::OUT_imm); } -void TranslatorVisitor::P2R_reg(u64) { - ThrowNotImplemented(Opcode::P2R_reg); -} - -void TranslatorVisitor::P2R_cbuf(u64) { - ThrowNotImplemented(Opcode::P2R_cbuf); -} - -void TranslatorVisitor::P2R_imm(u64) { - ThrowNotImplemented(Opcode::P2R_imm); -} - void TranslatorVisitor::PBK() { // PBK is a no-op } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 49ff911d6..b25af6cd3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -191,12 +191,12 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; - + /* FILE* file = fopen("D:\\shader.spv", "wb"); fwrite(code.data(), 4, code.size(), file); fclose(file); std::system("spirv-dis D:\\shader.spv"); - + */ shader_info->unique_hash = env.ComputeHash(); shader_info->size_bytes = env.ShaderSize(); return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info, -- cgit v1.2.3 From 274897dfd59b4d08029ab7e93be4f84654abcdc8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Feb 2021 23:42:38 -0300 Subject: spirv: Fixes and Intel specific workarounds --- src/shader_recompiler/backend/spirv/emit_context.cpp | 3 ++- src/shader_recompiler/backend/spirv/emit_context.h | 5 ++++- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 6 +++--- src/shader_recompiler/backend/spirv/emit_spirv.h | 8 ++++---- .../backend/spirv/emit_spirv_floating_point.cpp | 13 ++++++++++--- .../backend/spirv/emit_spirv_logical.cpp | 16 ++++++++-------- .../frontend/ir/structured_control_flow.cpp | 3 --- src/shader_recompiler/frontend/maxwell/program.cpp | 3 --- .../frontend/maxwell/translate/impl/impl.cpp | 15 +++++++++------ src/shader_recompiler/profile.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 11 files changed, 44 insertions(+), 32 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea1c8a3be..d2dbd56d4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -25,7 +25,8 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(IR::Program& program) : Sirit::Module(0x00010000) { +EmitContext::EmitContext(const Profile& profile_, IR::Program& program) + : Sirit::Module(0x00010000), profile{profile_} { AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 8de203da2..d20cf387e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -11,6 +11,7 @@ #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/shader_info.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::SPIRV { @@ -30,11 +31,13 @@ private: class EmitContext final : public Sirit::Module { public: - explicit EmitContext(IR::Program& program); + explicit EmitContext(const Profile& profile, IR::Program& program); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); + const Profile& profile; + Id void_id{}; Id U1{}; Id U16{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 2519e446a..f3aca90d0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -150,11 +150,11 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); - ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); } else { // Same as fp32, no need to warn as most drivers will flush by default } - } else if (info.uses_fp32_denorms_preserve) { + } else if (info.uses_fp16_denorms_preserve) { if (profile.support_fp16_denorm_preserve) { ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); @@ -166,7 +166,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit } // Anonymous namespace std::vector EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { - EmitContext ctx{program}; + EmitContext ctx{profile, program}; const Id void_function{ctx.TypeFunction(ctx.void_id)}; // FIXME: Forward declare functions (needs sirit support) Id func{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 922e294a7..cec80c13e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -202,10 +202,10 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -void EmitLogicalOr(EmitContext& ctx); -void EmitLogicalAnd(EmitContext& ctx); -void EmitLogicalXor(EmitContext& ctx); -void EmitLogicalNot(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); Id EmitConvertS16F16(EmitContext& ctx, Id value); Id EmitConvertS16F32(EmitContext& ctx, Id value); Id EmitConvertS16F64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 47f87054b..5d0b74f9b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -15,6 +15,13 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { return op; } +Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) { + if (ctx.profile.has_broken_spirv_clamp) { + return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); + } else { + return ctx.OpFClamp(type, value, zero, one); + } +} } // Anonymous namespace Id EmitFPAbs16(EmitContext& ctx, Id value) { @@ -144,19 +151,19 @@ void EmitFPLog2(EmitContext&) { Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; - return ctx.OpFClamp(ctx.F32[1], value, zero, one); + return Saturate(ctx, ctx.F16[1], value, zero, one); } Id EmitFPSaturate32(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; - return ctx.OpFClamp(ctx.F32[1], value, zero, one); + return Saturate(ctx, ctx.F32[1], value, zero, one); } Id EmitFPSaturate64(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; - return ctx.OpFClamp(ctx.F64[1], value, zero, one); + return Saturate(ctx, ctx.F64[1], value, zero, one); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index c5a07252f..bb434def2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -6,20 +6,20 @@ namespace Shader::Backend::SPIRV { -void EmitLogicalOr(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalOr(ctx.U1, a, b); } -void EmitLogicalAnd(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalAnd(ctx.U1, a, b); } -void EmitLogicalXor(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { + return ctx.OpLogicalNotEqual(ctx.U1, a, b); } -void EmitLogicalNot(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitLogicalNot(EmitContext& ctx, Id value) { + return ctx.OpLogicalNot(ctx.U1, value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp index d145095d1..032ac8fda 100644 --- a/src/shader_recompiler/frontend/ir/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/ir/structured_control_flow.cpp @@ -272,11 +272,9 @@ public: explicit GotoPass(std::span blocks, ObjectPool& stmt_pool) : pool{stmt_pool} { std::vector gotos{BuildUnorderedTreeGetGotos(blocks)}; - fmt::print(stdout, "BEFORE\n{}\n", DumpTree(root_stmt.children)); for (const Node& goto_stmt : gotos | std::views::reverse) { RemoveGoto(goto_stmt); } - fmt::print(stdout, "AFTER\n{}\n", DumpTree(root_stmt.children)); } Statement& RootStatement() noexcept { @@ -548,7 +546,6 @@ private: size_t Offset(ConstNode stmt) const { size_t offset{0}; if (!SearchNode(root_stmt.children, stmt, offset)) { - fmt::print(stdout, "{}\n", DumpTree(root_stmt.children)); throw LogicError("Node not found in tree"); } return offset; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ed5dbf41f..dbfc04f75 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -56,7 +56,6 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const s32 positive_value{static_cast(imm.value)}; - const s32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + if (imm.is_negative != 0) { + const s64 raw{static_cast(imm.value)}; + return ir.Imm32(static_cast(-(1LL << 19) + raw)); + } else { + return ir.Imm32(static_cast(imm.value)); + } } IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { @@ -94,9 +97,9 @@ IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { BitField<20, 19, u64> value; BitField<56, 1, u64> is_negative; } const imm{insn}; - const f32 positive_value{Common::BitCast(static_cast(imm.value) << 12)}; - const f32 value{imm.is_negative != 0 ? -positive_value : positive_value}; - return ir.Imm32(value); + const u32 sign_bit{imm.is_negative != 0 ? (1ULL << 31) : 0}; + const u32 value{static_cast(imm.value) << 12}; + return ir.Imm32(Common::BitCast(value | sign_bit)); } IR::U32 TranslatorVisitor::GetImm32(u64 insn) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9881bebab..917fc1251 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,9 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp16_denorm_flush{}; bool support_fp32_denorm_flush{}; + + // FClamp is broken and OpFMax + OpFMin should be used instead + bool has_broken_spirv_clamp{}; }; } // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b25af6cd3..2497c2385 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -189,6 +189,7 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, + .has_broken_spirv_clamp = true, // TODO: is_intel }; const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; /* -- cgit v1.2.3 From e44752ddc8804961eb84f8c225bb36d5b4c77bc1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 22 Feb 2021 22:59:16 -0300 Subject: shader: FMUL, select, RRO, and MUFU fixes --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 67 +++++-- .../backend/spirv/emit_spirv_floating_point.cpp | 192 +++++++++++++++++---- .../backend/spirv/emit_spirv_select.cpp | 21 ++- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 144 ++++++++++++++-- src/shader_recompiler/frontend/ir/ir_emitter.h | 18 +- src/shader_recompiler/frontend/ir/opcodes.inc | 50 +++++- .../maxwell/translate/impl/common_encoding.h | 3 +- .../maxwell/translate/impl/floating_point_add.cpp | 2 +- .../impl/floating_point_fused_multiply_add.cpp | 4 +- .../impl/floating_point_multi_function.cpp | 8 +- .../translate/impl/floating_point_multiply.cpp | 42 +++-- .../impl/floating_point_range_reduction.cpp | 41 +++++ .../frontend/maxwell/translate/impl/impl.cpp | 12 +- .../frontend/maxwell/translate/impl/impl.h | 5 +- .../maxwell/translate/impl/integer_shift_left.cpp | 2 +- .../maxwell/translate/impl/not_implemented.cpp | 12 -- .../ir_opt/constant_propagation_pass.cpp | 2 +- 18 files changed, 507 insertions(+), 119 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 802527255..5574feaa6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp + frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/integer_add.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 1b9be445e..130c71996 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -108,10 +108,12 @@ void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx); -void EmitSelect8(EmitContext& ctx); -void EmitSelect16(EmitContext& ctx); -Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitSelect64(EmitContext& ctx); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); void EmitBitCastU16F16(EmitContext& ctx); Id EmitBitCastU32F32(EmitContext& ctx, Id value); void EmitBitCastU64F64(EmitContext& ctx); @@ -149,18 +151,15 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPNeg16(EmitContext& ctx, Id value); Id EmitFPNeg32(EmitContext& ctx, Id value); Id EmitFPNeg64(EmitContext& ctx, Id value); -void EmitFPRecip32(EmitContext& ctx); -void EmitFPRecip64(EmitContext& ctx); -void EmitFPRecipSqrt32(EmitContext& ctx); -void EmitFPRecipSqrt64(EmitContext& ctx); -void EmitFPSqrt(EmitContext& ctx); -void EmitFPSin(EmitContext& ctx); -void EmitFPSinNotReduced(EmitContext& ctx); -void EmitFPExp2(EmitContext& ctx); -void EmitFPExp2NotReduced(EmitContext& ctx); -void EmitFPCos(EmitContext& ctx); -void EmitFPCosNotReduced(EmitContext& ctx); -void EmitFPLog2(EmitContext& ctx); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); @@ -176,6 +175,42 @@ Id EmitFPCeil64(EmitContext& ctx, Id value); Id EmitFPTrunc16(EmitContext& ctx, Id value); Id EmitFPTrunc32(EmitContext& ctx, Id value); Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 5d0b74f9b..749f11742 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -100,52 +100,40 @@ Id EmitFPNeg64(EmitContext& ctx, Id value) { return ctx.OpFNegate(ctx.F64[1], value); } -void EmitFPRecip32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitFPRecip64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSin(EmitContext& ctx, Id value) { + return ctx.OpSin(ctx.F32[1], value); } -void EmitFPRecipSqrt32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPCos(EmitContext& ctx, Id value) { + return ctx.OpCos(ctx.F32[1], value); } -void EmitFPRecipSqrt64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPExp2(EmitContext& ctx, Id value) { + return ctx.OpExp2(ctx.F32[1], value); } -void EmitFPSqrt(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPLog2(EmitContext& ctx, Id value) { + return ctx.OpLog2(ctx.F32[1], value); } -void EmitFPSin(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecip32(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F32[1], ctx.Constant(ctx.F32[1], 1.0f), value); } -void EmitFPSinNotReduced(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecip64(EmitContext& ctx, Id value) { + return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); } -void EmitFPExp2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F32[1], value); } -void EmitFPExp2NotReduced(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { + return ctx.OpInverseSqrt(ctx.F64[1], value); } -void EmitFPCos(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitFPCosNotReduced(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); -} - -void EmitFPLog2(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPSqrt(EmitContext& ctx, Id value) { + return ctx.OpSqrt(ctx.F32[1], value); } Id EmitFPSaturate16(EmitContext& ctx, Id value) { @@ -214,4 +202,148 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) { return ctx.OpTrunc(ctx.F64[1], value); } +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); +} + +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index eb1926a4d..21cca4455 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -6,20 +6,29 @@ namespace Shader::Backend::SPIRV { -void EmitSelect8(EmitContext&) { +Id EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Id cond, + [[maybe_unused]] Id true_value, [[maybe_unused]] Id false_value) { throw NotImplementedException("SPIR-V Instruction"); } -void EmitSelect16(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U16, cond, true_value, false_value); } -Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); } -void EmitSelect64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U64, cond, true_value, false_value); +} + +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); +} + +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 34c2f67fb..8ba86e614 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -361,19 +361,21 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { } } -UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { +Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); } switch (true_value.Type()) { case Type::U8: - return Inst(Opcode::Select8, condition, true_value, false_value); + return Inst(Opcode::SelectU8, condition, true_value, false_value); case Type::U16: - return Inst(Opcode::Select16, condition, true_value, false_value); + return Inst(Opcode::SelectU16, condition, true_value, false_value); case Type::U32: - return Inst(Opcode::Select32, condition, true_value, false_value); + return Inst(Opcode::SelectU32, condition, true_value, false_value); case Type::U64: - return Inst(Opcode::Select64, condition, true_value, false_value); + return Inst(Opcode::SelectU64, condition, true_value, false_value); + case Type::F32: + return Inst(Opcode::SelectF32, condition, true_value, false_value); default: throw InvalidArgument("Invalid type {}", true_value.Type()); } @@ -503,12 +505,16 @@ F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { return result; } -F32 IREmitter::FPCosNotReduced(const F32& value) { - return Inst(Opcode::FPCosNotReduced, value); +F32 IREmitter::FPCos(const F32& value) { + return Inst(Opcode::FPCos, value); +} + +F32 IREmitter::FPSin(const F32& value) { + return Inst(Opcode::FPSin, value); } -F32 IREmitter::FPExp2NotReduced(const F32& value) { - return Inst(Opcode::FPExp2NotReduced, value); +F32 IREmitter::FPExp2(const F32& value) { + return Inst(Opcode::FPExp2, value); } F32 IREmitter::FPLog2(const F32& value) { @@ -517,9 +523,9 @@ F32 IREmitter::FPLog2(const F32& value) { F32F64 IREmitter::FPRecip(const F32F64& value) { switch (value.Type()) { - case Type::U32: + case Type::F32: return Inst(Opcode::FPRecip32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRecip64, value); default: ThrowInvalidType(value.Type()); @@ -528,19 +534,15 @@ F32F64 IREmitter::FPRecip(const F32F64& value) { F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { switch (value.Type()) { - case Type::U32: + case Type::F32: return Inst(Opcode::FPRecipSqrt32, value); - case Type::U64: + case Type::F64: return Inst(Opcode::FPRecipSqrt64, value); default: ThrowInvalidType(value.Type()); } } -F32 IREmitter::FPSinNotReduced(const F32& value) { - return Inst(Opcode::FPSinNotReduced, value); -} - F32 IREmitter::FPSqrt(const F32& value) { return Inst(Opcode::FPSqrt, value); } @@ -610,6 +612,114 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { } } +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs, + rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, + rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, + rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, + lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F16: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual16 + : Opcode::FPUnordGreaterThanEqual16, + lhs, rhs); + case Type::F32: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 + : Opcode::FPUnordGreaterThanEqual32, + lhs, rhs); + case Type::F64: + return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 + : Opcode::FPUnordGreaterThanEqual64, + lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 959f4f9da..2c923716a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -98,7 +98,8 @@ public: const Value& e4); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); - [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, + const Value& false_value); template [[nodiscard]] Dest BitCast(const Source& value); @@ -121,12 +122,12 @@ public: [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); - [[nodiscard]] F32 FPCosNotReduced(const F32& value); - [[nodiscard]] F32 FPExp2NotReduced(const F32& value); + [[nodiscard]] F32 FPCos(const F32& value); + [[nodiscard]] F32 FPSin(const F32& value); + [[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value); [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); - [[nodiscard]] F32 FPSinNotReduced(const F32& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); @@ -134,6 +135,15 @@ public: [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, + bool ordered = true); + [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMul(const U32& a, const U32& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 50da77535..f2d71144a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -103,10 +103,12 @@ OPCODE(CompositeExtractF64x3, F64, F64x OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) // Select operations -OPCODE(Select8, U8, U1, U8, U8, ) -OPCODE(Select16, U16, U1, U16, U16, ) -OPCODE(Select32, U32, U1, U32, U32, ) -OPCODE(Select64, U64, U1, U64, U64, ) +OPCODE(SelectU8, U8, U1, U8, U8, ) +OPCODE(SelectU16, U16, U1, U16, U16, ) +OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U64, U64, ) +OPCODE(SelectF16, F16, U1, F16, F16, ) +OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions OPCODE(BitCastU16F16, U16, F16, ) @@ -156,11 +158,8 @@ OPCODE(FPRecipSqrt32, F32, F32, OPCODE(FPRecipSqrt64, F64, F64, ) OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) -OPCODE(FPSinNotReduced, F32, F32, ) OPCODE(FPExp2, F32, F32, ) -OPCODE(FPExp2NotReduced, F32, F32, ) OPCODE(FPCos, F32, F32, ) -OPCODE(FPCosNotReduced, F32, F32, ) OPCODE(FPLog2, F32, F32, ) OPCODE(FPSaturate16, F16, F16, ) OPCODE(FPSaturate32, F32, F32, ) @@ -178,6 +177,43 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) +OPCODE(FPOrdEqual16, U1, F16, F16, ) +OPCODE(FPOrdEqual32, U1, F32, F32, ) +OPCODE(FPOrdEqual64, U1, F64, F64, ) +OPCODE(FPUnordEqual16, U1, F16, F16, ) +OPCODE(FPUnordEqual32, U1, F32, F32, ) +OPCODE(FPUnordEqual64, U1, F64, F64, ) +OPCODE(FPOrdNotEqual16, U1, F16, F16, ) +OPCODE(FPOrdNotEqual32, U1, F32, F32, ) +OPCODE(FPOrdNotEqual64, U1, F64, F64, ) +OPCODE(FPUnordNotEqual16, U1, F16, F16, ) +OPCODE(FPUnordNotEqual32, U1, F32, F32, ) +OPCODE(FPUnordNotEqual64, U1, F64, F64, ) +OPCODE(FPOrdLessThan16, U1, F16, F16, ) +OPCODE(FPOrdLessThan32, U1, F32, F32, ) +OPCODE(FPOrdLessThan64, U1, F64, F64, ) +OPCODE(FPUnordLessThan16, U1, F16, F16, ) +OPCODE(FPUnordLessThan32, U1, F32, F32, ) +OPCODE(FPUnordLessThan64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) +OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) +OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) +OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) +OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) + // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h index 3da37a2bb..fd73f656c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h @@ -46,7 +46,8 @@ inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { case FmzMode::FTZ: return IR::FmzMode::FTZ; case FmzMode::FMZ: - return IR::FmzMode::FMZ; + // FMZ is manually handled in the instruction + return IR::FmzMode::FTZ; case FmzMode::INVALIDFMZ3: break; } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp index 219ffcc6a..76a807d4e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp @@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { } // Anonymous namespace void TranslatorVisitor::FADD_reg(u64 insn) { - FADD(*this, insn, GetRegFloat20(insn)); + FADD(*this, insn, GetFloatReg20(insn)); } void TranslatorVisitor::FADD_cbuf(u64 insn) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp index 758700d3c..c2ca0873b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp @@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s } // Anonymous namespace void TranslatorVisitor::FFMA_reg(u64 insn) { - FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_rc(u64) { @@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) { } void TranslatorVisitor::FFMA_cr(u64 insn) { - FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); + FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); } void TranslatorVisitor::FFMA_imm(u64) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp index ba005fbf4..2f8605619 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { -enum class Operation { +enum class Operation : u64 { Cos = 0, Sin = 1, Ex2 = 2, // Base 2 exponent @@ -39,11 +39,11 @@ void TranslatorVisitor::MUFU(u64 insn) { IR::F32 value{[&]() -> IR::F32 { switch (mufu.operation) { case Operation::Cos: - return ir.FPCosNotReduced(op_a); + return ir.FPCos(op_a); case Operation::Sin: - return ir.FPSinNotReduced(op_a); + return ir.FPSin(op_a); case Operation::Ex2: - return ir.FPExp2NotReduced(op_a); + return ir.FPExp2(op_a); case Operation::Lg2: return ir.FPLog2(op_a); case Operation::Rcp: diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp index 5c38d3fc1..edf2cadae 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp @@ -55,9 +55,6 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode if (cc) { throw NotImplementedException("FMUL CC"); } - if (sat) { - throw NotImplementedException("FMUL SAT"); - } IR::F32 op_a{v.F(fmul.src_a)}; if (scale != Scale::None) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { @@ -71,7 +68,20 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode .rounding{CastFpRounding(fp_rounding)}, .fmz_mode{CastFmzMode(fmz_mode)}, }; - v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); + IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; + if (fmz_mode == FmzMode::FMZ && !sat) { + // Do not implement FMZ if SAT is enabled, as it does the logic for us. + // On D3D9 mode, anything * 0 is zero, even NAN and infinity + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; + const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; + const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; + value = IR::F32{v.ir.Select(any_zero, zero, value)}; + } + if (sat) { + value = v.ir.FPSaturate(value); + } + v.F(fmul.dest_reg, value); } void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { @@ -83,27 +93,33 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { BitField<47, 1, u64> cc; BitField<48, 1, u64> neg_b; BitField<50, 1, u64> sat; - } fmul{insn}; - + } const fmul{insn}; FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, fmul.neg_b != 0); } } // Anonymous namespace void TranslatorVisitor::FMUL_reg(u64 insn) { - return FMUL(*this, insn, GetRegFloat20(insn)); + return FMUL(*this, insn, GetFloatReg20(insn)); } -void TranslatorVisitor::FMUL_cbuf(u64) { - throw NotImplementedException("FMUL (cbuf)"); +void TranslatorVisitor::FMUL_cbuf(u64 insn) { + return FMUL(*this, insn, GetFloatCbuf(insn)); } -void TranslatorVisitor::FMUL_imm(u64) { - throw NotImplementedException("FMUL (imm)"); +void TranslatorVisitor::FMUL_imm(u64 insn) { + return FMUL(*this, insn, GetFloatImm20(insn)); } -void TranslatorVisitor::FMUL32I(u64) { - throw NotImplementedException("FMUL32I"); +void TranslatorVisitor::FMUL32I(u64 insn) { + union { + u64 raw; + BitField<52, 1, u64> cc; + BitField<53, 2, FmzMode> fmz; + BitField<55, 1, u64> sat; + } const fmul32i{insn}; + FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, + fmul32i.sat != 0, fmul32i.cc != 0, false); } } // namespace Shader::Maxwell \ No newline at end of file diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp @@ -0,0 +1,41 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + SINCOS, + EX2, +}; + +void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { + union { + u64 raw; + BitField<0, 8, IR::Reg> dest_reg; + BitField<39, 1, Mode> mode; + BitField<45, 1, u64> neg; + BitField<49, 1, u64> abs; + } const rro{insn}; + + v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); +} +} // Anonymous namespace + +void TranslatorVisitor::RRO_reg(u64 insn) { + RRO(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::RRO_cbuf(u64 insn) { + RRO(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::RRO_imm(u64) { + throw NotImplementedException("RRO (imm)"); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 165d475b9..a5a0e1a9b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) { return X(reg.index); } -IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { return ir.BitCast(GetReg20(insn)); } -IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { +IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { return ir.BitCast(GetReg39(insn)); } @@ -110,6 +110,14 @@ IR::U32 TranslatorVisitor::GetImm32(u64 insn) { return ir.Imm32(static_cast(imm.value)); } +IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { + union { + u64 raw; + BitField<20, 32, u64> value; + } const imm{insn}; + return ir.Imm32(Common::BitCast(static_cast(imm.value))); +} + void TranslatorVisitor::SetZFlag(const IR::U1& value) { ir.SetZFlag(value); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 4d4cf2ebf..4e722e205 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -304,8 +304,8 @@ public: [[nodiscard]] IR::U32 GetReg8(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); - [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); + [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); @@ -314,6 +314,7 @@ public: [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); [[nodiscard]] IR::U32 GetImm32(u64 insn); + [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); void SetZFlag(const IR::U1& value); void SetSFlag(const IR::U1& value); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp index d8a5158b5..20af68852 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp @@ -50,7 +50,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { // const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; - result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); + result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; } v.X(shl.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 628cf1c14..4114e10be 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -721,18 +721,6 @@ void TranslatorVisitor::RET(u64) { ThrowNotImplemented(Opcode::RET); } -void TranslatorVisitor::RRO_reg(u64) { - ThrowNotImplemented(Opcode::RRO_reg); -} - -void TranslatorVisitor::RRO_cbuf(u64) { - ThrowNotImplemented(Opcode::RRO_cbuf); -} - -void TranslatorVisitor::RRO_imm(u64) { - ThrowNotImplemented(Opcode::RRO_imm); -} - void TranslatorVisitor::RTT(u64) { ThrowNotImplemented(Opcode::RTT); } diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d4e88259..ae3d5a7d6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -330,7 +330,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldBitCast(inst, IR::Opcode::BitCastF32U32); case IR::Opcode::IAdd64: return FoldAdd(block, inst); - case IR::Opcode::Select32: + case IR::Opcode::SelectU32: return FoldSelect(inst); case IR::Opcode::LogicalAnd: return FoldLogicalAnd(inst); -- cgit v1.2.3 From ba8c1d2eb479d04b2b0d847efd67468b688765d4 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 10 Mar 2021 22:42:17 -0500 Subject: shader: Implement FCMP still need to configure some settings for NV denorm flush and intel NaN --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 1 + .../backend/spirv/emit_spirv_floating_point.cpp | 4 + src/shader_recompiler/frontend/ir/ir_emitter.cpp | 80 +++++++++----- src/shader_recompiler/frontend/ir/ir_emitter.h | 19 ++-- src/shader_recompiler/frontend/ir/opcodes.inc | 2 + .../translate/impl/floating_point_compare.cpp | 116 +++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 16 --- .../ir_opt/collect_shader_info_pass.cpp | 14 ++- 9 files changed, 203 insertions(+), 50 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 755db5dfa..b45ff53b6 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -67,6 +67,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/common_funcs.h frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp + frontend/maxwell/translate/impl/floating_point_compare.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index aafc59bbb..b09978073 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -232,6 +232,7 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan32(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 749f11742..a359c42fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -346,4 +346,8 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); } +Id EmitFPIsNan32(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 33819dd36..5d475207e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -697,93 +697,107 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { } } -U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control}, + lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control}, + lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control}, + lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs); + return Inst(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: - return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, + Flags{control}, lhs, rhs); case Type::F32: - return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, + Flags{control}, lhs, rhs); case Type::F64: - return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs, - rhs); + return Inst(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F16: return Inst(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F32: return Inst(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F64: return Inst(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, - lhs, rhs); + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) { +U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, + bool ordered) { if (lhs.Type() != rhs.Type()) { throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } @@ -791,20 +805,32 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, boo case Type::F16: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual16 : Opcode::FPUnordGreaterThanEqual16, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F32: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual32 : Opcode::FPUnordGreaterThanEqual32, - lhs, rhs); + Flags{control}, lhs, rhs); case Type::F64: return Inst(ordered ? Opcode::FPOrdGreaterThanEqual64 : Opcode::FPUnordGreaterThanEqual64, - lhs, rhs); + Flags{control}, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } +U1 IREmitter::FPIsNan(const F32& value) { + return Inst(Opcode::FPIsNan32, value); +} + +U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { + return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); +} + +U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { + return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index e4d110540..5cfe1a54a 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -140,14 +140,21 @@ public: [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); - [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); - [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true); + [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, + bool ordered = true); + [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, + FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, - bool ordered = true); + FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, - bool ordered = true); + FpControl control = {}, bool ordered = true); + [[nodiscard]] U1 FPIsNan(const F32& value); + [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); + [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 64bd495ed..476281789 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -197,6 +197,7 @@ OPCODE(FPTrunc16, F16, F16, OPCODE(FPTrunc32, F32, F32, ) OPCODE(FPTrunc64, F64, F64, ) +<<<<<<< HEAD OPCODE(FPOrdEqual16, U1, F16, F16, ) OPCODE(FPOrdEqual32, U1, F32, F32, ) OPCODE(FPOrdEqual64, U1, F64, F64, ) @@ -233,6 +234,7 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan32, U1, F32, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..21cb80d67 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -0,0 +1,116 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid compare op {}", compare_op); + } +} + +void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_reg; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fcmp{insn}; + + const IR::F32 zero{v.ir.Imm32(0.0f)}; + const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; + IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; + const IR::U32 src_reg{v.X(fcmp.src_reg)}; + const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; + + v.X(fcmp.dest_reg, result); +} +} // Anonymous namespace + +void TranslatorVisitor::FCMP_reg(u64 insn) { + FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_rc(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FCMP_cr(u64 insn) { + FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); +} + +void TranslatorVisitor::FCMP_imm(u64 insn) { + FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 5b153acff..e1904472f 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,22 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FCMP_reg(u64) { - ThrowNotImplemented(Opcode::FCMP_reg); -} - -void TranslatorVisitor::FCMP_rc(u64) { - ThrowNotImplemented(Opcode::FCMP_rc); -} - -void TranslatorVisitor::FCMP_cr(u64) { - ThrowNotImplemented(Opcode::FCMP_cr); -} - -void TranslatorVisitor::FCMP_imm(u64) { - ThrowNotImplemented(Opcode::FCMP_imm); -} - void TranslatorVisitor::FMNMX_reg(u64) { ThrowNotImplemented(Opcode::FMNMX_reg); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index cdbe85221..70d75ad6c 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -256,7 +256,19 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven32: case IR::Opcode::FPFloor32: case IR::Opcode::FPCeil32: - case IR::Opcode::FPTrunc32: { + case IR::Opcode::FPTrunc32: + case IR::Opcode::FPOrdEqual32: + case IR::Opcode::FPUnordEqual32: + case IR::Opcode::FPOrdNotEqual32: + case IR::Opcode::FPUnordNotEqual32: + case IR::Opcode::FPOrdLessThan32: + case IR::Opcode::FPUnordLessThan32: + case IR::Opcode::FPOrdGreaterThan32: + case IR::Opcode::FPUnordGreaterThan32: + case IR::Opcode::FPOrdLessThanEqual32: + case IR::Opcode::FPUnordLessThanEqual32: + case IR::Opcode::FPOrdGreaterThanEqual32: + case IR::Opcode::FPUnordGreaterThanEqual32: { const auto control{inst.Flags()}; switch (control.fmz_mode) { case IR::FmzMode::DontCare: -- cgit v1.2.3 From 8d470c2e63c2dac334ccff2bcda9a0607ce76377 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 14 Mar 2021 01:23:56 -0500 Subject: shader: Implement FMNMX And add a const in FCMP --- src/shader_recompiler/CMakeLists.txt | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 8 +-- .../backend/spirv/emit_spirv_floating_point.cpp | 16 +++--- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 28 +++++++++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 2 + .../translate/impl/floating_point_compare.cpp | 2 +- .../translate/impl/floating_point_min_max.cpp | 57 ++++++++++++++++++++++ .../maxwell/translate/impl/not_implemented.cpp | 12 ----- 8 files changed, 101 insertions(+), 25 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index b45ff53b6..171fdd321 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -70,6 +70,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/floating_point_compare.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp + frontend/maxwell/translate/impl/floating_point_min_max.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b09978073..89566c83d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -162,10 +162,10 @@ Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -void EmitFPMax32(EmitContext& ctx); -void EmitFPMax64(EmitContext& ctx); -void EmitFPMin32(EmitContext& ctx); -void EmitFPMin64(EmitContext& ctx); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index a359c42fc..e635b1ffb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -60,20 +60,20 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -void EmitFPMax32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F32[1], a, b); } -void EmitFPMax64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMax(ctx.F64[1], a, b); } -void EmitFPMin32(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F32[1], a, b); } -void EmitFPMin64(EmitContext&) { - throw NotImplementedException("SPIR-V Instruction"); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { + return ctx.OpFMin(ctx.F64[1], a, b); } Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 5d475207e..556961fa4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -831,6 +831,34 @@ U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMax32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMax64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::F32: + return Inst(Opcode::FPMin32, Flags{control}, lhs, rhs); + case Type::F64: + return Inst(Opcode::FPMin64, Flags{control}, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } +} + U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { if (a.Type() != b.Type()) { throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 5cfe1a54a..74fb3dbcb 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -155,6 +155,8 @@ public: [[nodiscard]] U1 FPIsNan(const F32& value); [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index 21cb80d67..f254ecb3a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -88,7 +88,7 @@ void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& o const IR::F32 zero{v.ir.Imm32(0.0f)}; const IR::F32 neg_zero{v.ir.Imm32(-0.0f)}; - IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; + const IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}}; const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; const IR::U32 src_reg{v.X(fcmp.src_reg)}; const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c3180a9bd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp @@ -0,0 +1,57 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<44, 1, u64> ftz; + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + } const fmnmx{insn}; + + const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0); + + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; + IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; + + if (fmnmx.neg_pred != 0) { + std::swap(min, max); + } + + v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FMNMX_reg(u64 insn) { + FMNMX(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FMNMX_cbuf(u64 insn) { + FMNMX(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FMNMX_imm(u64 insn) { + FMNMX(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index e1904472f..01ecbb4cc 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,18 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FMNMX_reg(u64) { - ThrowNotImplemented(Opcode::FMNMX_reg); -} - -void TranslatorVisitor::FMNMX_cbuf(u64) { - ThrowNotImplemented(Opcode::FMNMX_cbuf); -} - -void TranslatorVisitor::FMNMX_imm(u64) { - ThrowNotImplemented(Opcode::FMNMX_imm); -} - void TranslatorVisitor::FSET_reg(u64) { ThrowNotImplemented(Opcode::FSET_reg); } -- cgit v1.2.3 From a77e764726938a26803fa90a9c69ccdd32ab09cd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 21 Mar 2021 00:42:56 -0300 Subject: shader: Add support for fp16 comparisons and misc fixes --- src/shader_recompiler/backend/spirv/emit_spirv.h | 2 ++ .../backend/spirv/emit_spirv_floating_point.cpp | 8 ++++++++ src/shader_recompiler/frontend/ir/ir_emitter.cpp | 23 ++++++++++++++++++---- src/shader_recompiler/frontend/ir/ir_emitter.h | 6 +++--- src/shader_recompiler/frontend/ir/opcodes.inc | 2 ++ .../translate/impl/half_floating_point_add.cpp | 3 +-- .../half_floating_point_fused_multiply_add.cpp | 4 +--- .../impl/half_floating_point_multiply.cpp | 3 +-- .../translate/impl/half_floating_point_set.cpp | 1 + .../ir_opt/collect_shader_info_pass.cpp | 16 +++++++++++++++ .../ir_opt/lower_fp16_to_fp32.cpp | 2 ++ 11 files changed, 56 insertions(+), 14 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index e297a0e20..486ef10a7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -234,7 +234,9 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); void EmitIAdd64(EmitContext& ctx); Id EmitISub32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index e635b1ffb..1fdf66cb6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -346,8 +346,16 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); } +Id EmitFPIsNan16(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + Id EmitFPIsNan32(EmitContext& ctx, Id value) { return ctx.OpIsNan(ctx.U1, value); } +Id EmitFPIsNan64(EmitContext& ctx, Id value) { + return ctx.OpIsNan(ctx.U1, value); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 652f6949e..1eda95071 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -895,15 +895,30 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpC } } -U1 IREmitter::FPIsNan(const F32& value) { - return Inst(Opcode::FPIsNan32, value); +U1 IREmitter::FPIsNan(const F16F32F64& value) { + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::FPIsNan16, value); + case Type::F32: + return Inst(Opcode::FPIsNan32, value); + case Type::F64: + return Inst(Opcode::FPIsNan64, value); + default: + ThrowInvalidType(value.Type()); + } } -U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) { +U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); } -U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) { +U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { + if (lhs.Type() != rhs.Type()) { + throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 8edb11154..ab4537d88 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -161,9 +161,9 @@ public: FpControl control = {}, bool ordered = true); [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, bool ordered = true); - [[nodiscard]] U1 FPIsNan(const F32& value); - [[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs); - [[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs); + [[nodiscard]] U1 FPIsNan(const F16F32F64& value); + [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); + [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8471db7b9..884eea7a8 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -236,7 +236,9 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64, OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) +OPCODE(FPIsNan16, U1, F16, ) OPCODE(FPIsNan32, U1, F32, ) +OPCODE(FPIsNan64, U1, F64, ) // Integer operations OPCODE(IAdd32, U32, U32, U32, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp index 19e3401ca..03e7bf047 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { union { @@ -66,7 +65,7 @@ void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swi HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HADD2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp index 2f3996274..8b234bd6a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, HalfPrecision precision) { @@ -85,8 +84,7 @@ void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizz HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, sat, precision); } - -} // namespace +} // Anonymous namespace void TranslatorVisitor::HFMA2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp index ff34a8c8f..2451a6ef6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp @@ -6,7 +6,6 @@ namespace Shader::Maxwell { namespace { - void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, HalfPrecision precision) { @@ -79,7 +78,7 @@ void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, boo HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, hmul2.precision); } -} // namespace +} // Anonymous namespace void TranslatorVisitor::HMUL2_reg(u64 insn) { union { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 1d28c0531..7f1f4b88c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -76,6 +76,7 @@ void TranslatorVisitor::HSET2_reg(u64 insn) { BitField<35, 4, FPCompareOp> compare_op; BitField<28, 2, Swizzle> swizzle_b; } const hset2{insn}; + HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); } diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index e7fa3fce0..fd6069c65 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -74,6 +74,9 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::CompositeExtractF16x2: case IR::Opcode::CompositeExtractF16x3: case IR::Opcode::CompositeExtractF16x4: + case IR::Opcode::CompositeInsertF16x2: + case IR::Opcode::CompositeInsertF16x3: + case IR::Opcode::CompositeInsertF16x4: case IR::Opcode::SelectF16: case IR::Opcode::BitCastU16F16: case IR::Opcode::BitCastF16U16: @@ -103,6 +106,19 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: case IR::Opcode::FPTrunc16: + case IR::Opcode::FPOrdEqual16: + case IR::Opcode::FPUnordEqual16: + case IR::Opcode::FPOrdNotEqual16: + case IR::Opcode::FPUnordNotEqual16: + case IR::Opcode::FPOrdLessThan16: + case IR::Opcode::FPUnordLessThan16: + case IR::Opcode::FPOrdGreaterThan16: + case IR::Opcode::FPUnordGreaterThan16: + case IR::Opcode::FPOrdLessThanEqual16: + case IR::Opcode::FPUnordLessThanEqual16: + case IR::Opcode::FPOrdGreaterThanEqual16: + case IR::Opcode::FPUnordGreaterThanEqual16: + case IR::Opcode::FPIsNan16: info.uses_fp16 = true; break; case IR::Opcode::FPAbs64: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 7723c9a57..0e8862f45 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -74,6 +74,8 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::FPOrdGreaterThanEqual32; case IR::Opcode::FPUnordGreaterThanEqual16: return IR::Opcode::FPUnordGreaterThanEqual32; + case IR::Opcode::FPIsNan16: + return IR::Opcode::FPIsNan32; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: -- cgit v1.2.3 From 8cb9443cb99c4510e6ef26a91d09a31a8fa6281f Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Wed, 24 Mar 2021 00:02:30 +0100 Subject: shader: Fix F2I --- src/shader_recompiler/backend/spirv/emit_spirv.h | 3 + .../backend/spirv/emit_spirv_floating_point.cpp | 20 ++++- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 18 +++++ src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + src/shader_recompiler/frontend/ir/opcodes.inc | 3 + .../impl/floating_point_conversion_integer.cpp | 88 ++++++++++++++++++++-- .../frontend/maxwell/translate/impl/impl.cpp | 17 +++++ .../frontend/maxwell/translate/impl/impl.h | 2 + .../ir_opt/collect_shader_info_pass.cpp | 2 + .../ir_opt/lower_fp16_to_fp32.cpp | 2 + 10 files changed, 147 insertions(+), 9 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 7fefcf2f2..6d4adafc7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -189,6 +189,9 @@ Id EmitFPSqrt(EmitContext& ctx, Id value); Id EmitFPSaturate16(EmitContext& ctx, Id value); Id EmitFPSaturate32(EmitContext& ctx, Id value); Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); Id EmitFPRoundEven16(EmitContext& ctx, Id value); Id EmitFPRoundEven32(EmitContext& ctx, Id value); Id EmitFPRoundEven64(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 1fdf66cb6..24300af39 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -15,7 +15,7 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { return op; } -Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) { +Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { if (ctx.profile.has_broken_spirv_clamp) { return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); } else { @@ -139,19 +139,31 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) { Id EmitFPSaturate16(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; - return Saturate(ctx, ctx.F16[1], value, zero, one); + return Clamp(ctx, ctx.F16[1], value, zero, one); } Id EmitFPSaturate32(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; - return Saturate(ctx, ctx.F32[1], value, zero, one); + return Clamp(ctx, ctx.F32[1], value, zero, one); } Id EmitFPSaturate64(EmitContext& ctx, Id value) { const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; - return Saturate(ctx, ctx.F64[1], value, zero, one); + return Clamp(ctx, ctx.F64[1], value, zero, one); +} + +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F16[1], value, min_value, max_value); +} + +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F32[1], value, min_value, max_value); +} + +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { + return Clamp(ctx, ctx.F64[1], value, min_value, max_value); } Id EmitFPRoundEven16(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index ce610799a..6280c08f6 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { } } +F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, + const F16F32F64& max_value) { + if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { + throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), + max_value.Type()); + } + switch (value.Type()) { + case Type::F16: + return Inst(Opcode::FPClamp16, value, min_value, max_value); + case Type::F32: + return Inst(Opcode::FPClamp32, value, min_value, max_value); + case Type::F64: + return Inst(Opcode::FPClamp64, value, min_value, max_value); + default: + ThrowInvalidType(value.Type()); + } +} + F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { switch (value.Type()) { case Type::F16: diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 39109b0de..ebbda78a9 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -147,6 +147,7 @@ public: [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32 FPSqrt(const F32& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); + [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 8945c7b04..dd17212a1 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -192,6 +192,9 @@ OPCODE(FPLog2, F32, F32, OPCODE(FPSaturate16, F16, F16, ) OPCODE(FPSaturate32, F32, F32, ) OPCODE(FPSaturate64, F64, F64, ) +OPCODE(FPClamp16, F16, F16, F16, F16, ) +OPCODE(FPClamp32, F32, F32, F32, F32, ) +OPCODE(FPClamp64, F64, F64, F64, F64, ) OPCODE(FPRoundEven16, F16, F16, ) OPCODE(FPRoundEven32, F32, F32, ) OPCODE(FPRoundEven64, F64, F64, ) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp index 81175627f..7c5a72800 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/common_types.h" #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/maxwell/opcodes.h" @@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) { } } +std::pair ClampBounds(DestFormat format, bool is_signed) { + if (is_signed) { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } else { + switch (format) { + case DestFormat::I16: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I32: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + case DestFormat::I64: + return {static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::min())}; + default: {} + } + } + throw NotImplementedException("Invalid destination format {}", format); +} + IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { union { u64 raw; @@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { // For example converting F32 65537.0 to U16, the expected value is 0xffff, const bool is_signed{f2i.is_signed != 0}; - const size_t bitsize{BitSize(f2i.dest_format)}; - const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; + const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); + + IR::F16F32F64 intermediate; + switch (f2i.src_format) { + case SrcFormat::F16: { + const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(max_bound)))}; + const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast(min_bound)))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F32: { + const IR::F32 max_val{v.ir.Imm32(static_cast(max_bound))}; + const IR::F32 min_val{v.ir.Imm32(static_cast(min_bound))}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + case SrcFormat::F64: { + const IR::F64 max_val{v.ir.Imm64(max_bound)}; + const IR::F64 min_val{v.ir.Imm64(min_bound)}; + intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); + break; + } + default: + throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); + } + + const size_t bitsize{std::max(32, BitSize(f2i.dest_format))}; + IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; + + bool handled_special_case = false; + const bool special_nan_cases = + (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); + if (special_nan_cases) { + if (f2i.dest_format == DestFormat::I32) { + handled_special_case = true; + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; + } else if (f2i.dest_format == DestFormat::I64) { + handled_special_case = true; + result = IR::U64{ + v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)}; + } + } + if (!handled_special_case && is_signed) { + if (bitsize != 64) { + result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; + } else { + result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)}; + } + } if (bitsize == 64) { - const IR::Value vector{v.ir.UnpackUint2x32(result)}; - v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)}); - v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)}); + v.L(f2i.dest_reg, result); } else { v.X(f2i.dest_reg, result); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 758a0230a..9bae89c10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) { return ir.GetReg(reg); } +IR::U64 TranslatorVisitor::L(IR::Reg reg) { + if (!IR::IsAligned(reg, 2)) { + throw NotImplementedException("Unaligned source register {}", reg); + } + return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; +} + IR::F32 TranslatorVisitor::F(IR::Reg reg) { return ir.BitCast(X(reg)); } @@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { ir.SetReg(dest_reg, value); } +void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { + if (!IR::IsAligned(dest_reg, 2)) { + throw NotImplementedException("Unaligned destination register {}", dest_reg); + } + const IR::Value result{ir.UnpackUint2x32(value)}; + for (int i = 0; i < 2; i++) { + X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)}); + } +} + void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { X(dest_reg, ir.BitCast(value)); } diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index c994fe803..54c31deb4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -341,10 +341,12 @@ public: void XMAD_imm(u64 insn); [[nodiscard]] IR::U32 X(IR::Reg reg); + [[nodiscard]] IR::U64 L(IR::Reg reg); [[nodiscard]] IR::F32 F(IR::Reg reg); [[nodiscard]] IR::F64 D(IR::Reg reg); void X(IR::Reg dest_reg, const IR::U32& value); + void L(IR::Reg dest_reg, const IR::U64& value); void F(IR::Reg dest_reg, const IR::F32& value); void D(IR::Reg dest_reg, const IR::F64& value); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index db5138e4d..32f276f3b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -105,6 +105,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPNeg16: case IR::Opcode::FPRoundEven16: case IR::Opcode::FPSaturate16: + case IR::Opcode::FPClamp16: case IR::Opcode::FPTrunc16: case IR::Opcode::FPOrdEqual16: case IR::Opcode::FPUnordEqual16: @@ -148,6 +149,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::FPRecipSqrt64: case IR::Opcode::FPRoundEven64: case IR::Opcode::FPSaturate64: + case IR::Opcode::FPClamp64: case IR::Opcode::FPTrunc64: case IR::Opcode::FPOrdEqual64: case IR::Opcode::FPUnordEqual64: diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 0e8862f45..0d2c91ed6 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -30,6 +30,8 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::FPRoundEven32; case IR::Opcode::FPSaturate16: return IR::Opcode::FPSaturate32; + case IR::Opcode::FPClamp16: + return IR::Opcode::FPClamp32; case IR::Opcode::FPTrunc16: return IR::Opcode::FPTrunc32; case IR::Opcode::CompositeConstructF16x2: -- cgit v1.2.3 From 5b8afed87115c82cb48913fd47dfbfa347e4faa5 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 18 Apr 2021 20:47:31 -0400 Subject: spirv: Replace Constant/ConstantComposite with Const helper --- .../backend/spirv/emit_context.cpp | 69 +++++++++++----------- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- .../backend/spirv/emit_spirv_atomic.cpp | 16 ++--- .../backend/spirv/emit_spirv_barriers.cpp | 9 ++- .../backend/spirv/emit_spirv_context_get_set.cpp | 38 ++++++------ .../backend/spirv/emit_spirv_floating_point.cpp | 6 +- .../backend/spirv/emit_spirv_image.cpp | 23 +++----- .../backend/spirv/emit_spirv_integer.cpp | 2 +- .../backend/spirv/emit_spirv_memory.cpp | 4 +- .../backend/spirv/emit_spirv_shared_memory.cpp | 30 +++++----- .../backend/spirv/emit_spirv_special.cpp | 8 +-- .../backend/spirv/emit_spirv_warp.cpp | 6 +- 12 files changed, 101 insertions(+), 112 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index b9e6d5655..214ef9c25 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -131,13 +131,13 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, case Stage::TessellationControl: case Stage::TessellationEval: if (per_invocation) { - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 32u)); + type = ctx.TypeArray(type, ctx.Const(32u)); } break; case Stage::Geometry: if (per_invocation) { const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); + type = ctx.TypeArray(type, ctx.Const(num_vertices)); } break; default: @@ -149,7 +149,7 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, Id DefineOutput(EmitContext& ctx, Id type, std::optional invocations, std::optional builtin = std::nullopt) { if (invocations && ctx.stage == Stage::TessellationControl) { - type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], *invocations)); + type = ctx.TypeArray(type, ctx.Const(*invocations)); } return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); } @@ -224,7 +224,7 @@ std::optional AttrTypes(EmitContext& ctx, u32 index) { void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, u32 binding, Id type, char type_char, u32 element_size) { - const Id array_type{ctx.TypeArray(type, ctx.Constant(ctx.U32[1], 65536U / element_size))}; + const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{ctx.TypeStruct(array_type)}; @@ -328,7 +328,7 @@ Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_p const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; const Id cas_func{CasFunction(ctx, operation, value_type)}; const Id zero{ctx.u32_zero_value}; - const Id scope_id{ctx.Constant(ctx.U32[1], static_cast(scope))}; + const Id scope_id{ctx.Const(static_cast(scope))}; const Id loop_header{ctx.OpLabel()}; const Id continue_block{ctx.OpLabel()}; @@ -428,11 +428,11 @@ Id EmitContext::Def(const IR::Value& value) { case IR::Type::U1: return value.U1() ? true_value : false_value; case IR::Type::U32: - return Constant(U32[1], value.U32()); + return Const(value.U32()); case IR::Type::U64: return Constant(U64, value.U64()); case IR::Type::F32: - return Constant(F32[1], value.F32()); + return Const(value.F32()); case IR::Type::F64: return Constant(F64[1], value.F64()); case IR::Type::Label: @@ -486,8 +486,8 @@ void EmitContext::DefineCommonTypes(const Info& info) { void EmitContext::DefineCommonConstants() { true_value = ConstantTrue(U1); false_value = ConstantFalse(U1); - u32_zero_value = Constant(U32[1], 0U); - f32_zero_value = Constant(F32[1], 0.0f); + u32_zero_value = Const(0U); + f32_zero_value = Const(0.0f); } void EmitContext::DefineInterfaces(const IR::Program& program) { @@ -500,7 +500,7 @@ void EmitContext::DefineLocalMemory(const IR::Program& program) { return; } const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; - const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id type{TypeArray(U32[1], Const(num_elements))}; const Id pointer{TypePointer(spv::StorageClass::Private, type)}; local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); if (profile.supported_spirv >= 0x00010400) { @@ -514,7 +514,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { } const auto make{[&](Id element_type, u32 element_size) { const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; - const Id array_type{TypeArray(element_type, Constant(U32[1], num_elements))}; + const Id array_type{TypeArray(element_type, Const(num_elements))}; Decorate(array_type, spv::Decoration::ArrayStride, element_size); const Id struct_type{TypeStruct(array_type)}; @@ -549,7 +549,7 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { return; } const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; - const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; + const Id type{TypeArray(U32[1], Const(num_elements))}; shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); @@ -569,10 +569,10 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { OpBranch(loop_header); AddLabel(loop_header); - const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; - const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Constant(U32[1], 3U))}; - const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Constant(U32[1], mask))}; - const Id count{Constant(U32[1], size)}; + const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; + const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; + const Id count{Const(size)}; OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); OpBranch(continue_block); @@ -580,9 +580,8 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; const Id old_value{OpLoad(U32[1], word_pointer)}; const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; - const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Constant(U32[1], 1U), - u32_zero_value, u32_zero_value, new_value, - old_value)}; + const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, + u32_zero_value, new_value, old_value)}; const Id success{OpIEqual(U1, atomic_res, old_value)}; OpBranchConditional(success, merge_block, loop_header); @@ -623,9 +622,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; AddLabel(); - const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; - const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; - const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Constant(U32[1], 2U))}; + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; if (info.loads_position) { @@ -643,7 +642,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); OpSwitch(compare_index, default_label, literals, labels); AddLabel(default_label); - OpReturnValue(Constant(F32[1], 0.0f)); + OpReturnValue(Const(0.0f)); size_t label_index{0}; if (info.loads_position) { AddLabel(labels[label_index]); @@ -661,7 +660,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(labels[label_index]); const auto type{AttrTypes(*this, static_cast(i))}; if (!type) { - OpReturnValue(Constant(F32[1], 0.0f)); + OpReturnValue(Const(0.0f)); ++label_index; continue; } @@ -688,9 +687,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id offset{OpFunctionParameter(U32[1])}; const Id store_value{OpFunctionParameter(F32[1])}; AddLabel(); - const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; - const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; - const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Constant(U32[1], 2U))}; + const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; + const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; + const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; if (info.stores_position) { @@ -744,7 +743,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturn(); ++label_index; AddLabel(labels[label_index]); - const Id fixed_index{OpIAdd(U32[1], masked_index, Constant(U32[1], 4))}; + const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; OpStore(pointer2, store_value); OpReturn(); @@ -1018,9 +1017,9 @@ void EmitContext::DefineInputs(const Info& info) { DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); } if (info.uses_fswzadd) { - const Id f32_one{Constant(F32[1], 1.0f)}; - const Id f32_minus_one{Constant(F32[1], -1.0f)}; - const Id f32_zero{Constant(F32[1], 0.0f)}; + const Id f32_one{Const(1.0f)}; + const Id f32_minus_one{Const(-1.0f)}; + const Id f32_zero{Const(0.0f)}; fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); @@ -1118,7 +1117,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } - const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; + const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } if (info.stores_layer && @@ -1136,7 +1135,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } if (info.stores_viewport_mask && profile.support_viewport_mask) { - viewport_mask = DefineOutput(*this, TypeArray(U32[1], Constant(U32[1], 1u)), std::nullopt); + viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt); } for (size_t index = 0; index < info.stores_generics.size(); ++index) { if (info.stores_generics[index]) { @@ -1146,13 +1145,13 @@ void EmitContext::DefineOutputs(const IR::Program& program) { switch (stage) { case Stage::TessellationControl: if (info.stores_tess_level_outer) { - const Id type{TypeArray(F32[1], Constant(U32[1], 4))}; + const Id type{TypeArray(F32[1], Const(4U))}; output_tess_level_outer = DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); Decorate(output_tess_level_outer, spv::Decoration::Patch); } if (info.stores_tess_level_inner) { - const Id type{TypeArray(F32[1], Constant(U32[1], 2))}; + const Id type{TypeArray(F32[1], Const(2U))}; output_tess_level_inner = DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); Decorate(output_tess_level_inner, spv::Decoration::Patch); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 7567fdcac..ef8507367 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -114,7 +114,7 @@ public: } Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { - return ConstantComposite(U32[2], Const(element_1), Const(element_2), Const(element_3), + return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3), Const(element_4)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index c2c879a6c..6e17d1c7e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -7,10 +7,10 @@ namespace Shader::Backend::SPIRV { namespace { Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; if (index_offset > 0) { - index = ctx.OpIAdd(ctx.U32[1], index, ctx.Constant(ctx.U32[1], index_offset)); + index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); } return ctx.profile.support_explicit_workgroup_layout ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) @@ -20,14 +20,14 @@ Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { if (offset.IsImmediate()) { const u32 imm_offset{static_cast(offset.U32() / element_size)}; - return ctx.Constant(ctx.U32[1], imm_offset); + return ctx.Const(imm_offset); } const u32 shift{static_cast(std::countr_zero(element_size))}; const Id index{ctx.Def(offset)}; if (shift == 0) { return index; } - const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id shift_id{ctx.Const(shift)}; return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } @@ -43,7 +43,7 @@ Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, } std::pair AtomicArgs(EmitContext& ctx) { - const Id scope{ctx.Constant(ctx.U32[1], static_cast(spv::Scope::Device))}; + const Id scope{ctx.Const(static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; return {scope, semantics}; } @@ -103,13 +103,13 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { } Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); } Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); } @@ -132,7 +132,7 @@ Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { - const Id shift_id{ctx.Constant(ctx.U32[1], 3U)}; + const Id shift_id{ctx.Const(3U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 366dc6a0c..705aebd81 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -12,8 +12,7 @@ void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory}; - ctx.OpMemoryBarrier(ctx.Constant(ctx.U32[1], static_cast(scope)), - ctx.Constant(ctx.U32[1], static_cast(semantics))); + ctx.OpMemoryBarrier(ctx.Const(static_cast(scope)), ctx.Const(static_cast(semantics))); } } // Anonymous namespace @@ -22,9 +21,9 @@ void EmitBarrier(EmitContext& ctx) { const auto memory{spv::Scope::Workgroup}; const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::WorkgroupMemory}; - ctx.OpControlBarrier(ctx.Constant(ctx.U32[1], static_cast(execution)), - ctx.Constant(ctx.U32[1], static_cast(memory)), - ctx.Constant(ctx.U32[1], static_cast(memory_semantics))); + ctx.OpControlBarrier(ctx.Const(static_cast(execution)), + ctx.Const(static_cast(memory)), + ctx.Const(static_cast(memory_semantics))); } void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ed57e44a2..5cc9d0d39 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -69,7 +69,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { return info.id; } else { const u32 index_element{element - info.first_element}; - const Id index_id{ctx.Constant(ctx.U32[1], index_element)}; + const Id index_id{ctx.Const(index_element)}; return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); } } @@ -81,7 +81,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::PositionZ: case IR::Attribute::PositionW: { const u32 element{static_cast(attr) % 4}; - const Id element_id{ctx.Constant(ctx.U32[1], element)}; + const Id element_id{ctx.Const(element)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); } case IR::Attribute::ClipDistance0: @@ -94,7 +94,7 @@ std::optional OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { case IR::Attribute::ClipDistance7: { const u32 base{static_cast(IR::Attribute::ClipDistance0)}; const u32 index{static_cast(attr) - base}; - const Id clip_num{ctx.Constant(ctx.U32[1], index)}; + const Id clip_num{ctx.Const(index)}; return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); } case IR::Attribute::Layer: @@ -131,7 +131,7 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, Id index{ctx.Def(offset)}; if (element_size > 1) { const u32 log2_element_size{static_cast(std::countr_zero(element_size))}; - const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; + const Id shift{ctx.Const(log2_element_size)}; index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); } const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; @@ -140,7 +140,7 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, if (offset.U32() % element_size != 0) { throw NotImplementedException("Unaligned immediate constant buffer load"); } - const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; + const Id imm_offset{ctx.Const(offset.U32() / element_size)}; const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; return ctx.OpLoad(result_type, access_chain); } @@ -212,13 +212,13 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const u32 element{static_cast(attr) % 4}; - const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; + const auto element_id{[&] { return ctx.Const(element); }}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; if (!type) { // Attribute is disabled - return ctx.Constant(ctx.F32[1], 0.0f); + return ctx.Const(0.0f); } const Id generic_id{ctx.input_generics.at(index)}; const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; @@ -252,20 +252,19 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { } case IR::Attribute::FrontFace: return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), - ctx.Constant(ctx.U32[1], std::numeric_limits::max()), - ctx.u32_zero_value); + ctx.Const(std::numeric_limits::max()), ctx.u32_zero_value); case IR::Attribute::PointSpriteS: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); case IR::Attribute::PointSpriteT: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); case IR::Attribute::TessellationEvaluationPointU: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); case IR::Attribute::TessellationEvaluationPointV: - return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, - ctx.Constant(ctx.U32[1], 1U))); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); default: throw NotImplementedException("Read attribute {}", attr); @@ -303,7 +302,7 @@ Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { throw NotImplementedException("Non-generic patch load"); } const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; const Id pointer{ctx.OpAccessChain(ctx.input_f32, ctx.patches.at(index), element)}; return ctx.OpLoad(ctx.F32[1], pointer); } @@ -312,7 +311,7 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { const Id pointer{[&] { if (IR::IsGeneric(patch)) { const u32 index{IR::GenericPatchIndex(patch)}; - const Id element{ctx.Constant(ctx.U32[1], IR::GenericPatchElement(patch))}; + const Id element{ctx.Const(IR::GenericPatchElement(patch))}; return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); } switch (patch) { @@ -321,15 +320,14 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { case IR::Patch::TessellationLodTop: case IR::Patch::TessellationLodBottom: { const u32 index{static_cast(patch) - u32(IR::Patch::TessellationLodLeft)}; - const Id index_id{ctx.Constant(ctx.U32[1], index)}; + const Id index_id{ctx.Const(index)}; return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); } case IR::Patch::TessellationLodInteriorU: return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.u32_zero_value); case IR::Patch::TessellationLodInteriorV: - return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, - ctx.Constant(ctx.U32[1], 1u)); + return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u)); default: throw NotImplementedException("Patch {}", patch); } @@ -338,7 +336,7 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { } void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { - const Id component_id{ctx.Constant(ctx.U32[1], component)}; + const Id component_id{ctx.Const(component)}; const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; ctx.OpStore(pointer, value); } @@ -404,7 +402,7 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { } Id EmitYDirection(EmitContext& ctx) { - return ctx.Constant(ctx.F32[1], ctx.profile.y_negate ? -1.0f : 1.0f); + return ctx.Const(ctx.profile.y_negate ? -1.0f : 1.0f); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 24300af39..97d11cc63 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -117,7 +117,7 @@ Id EmitFPLog2(EmitContext& ctx, Id value) { } Id EmitFPRecip32(EmitContext& ctx, Id value) { - return ctx.OpFDiv(ctx.F32[1], ctx.Constant(ctx.F32[1], 1.0f), value); + return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value); } Id EmitFPRecip64(EmitContext& ctx, Id value) { @@ -143,8 +143,8 @@ Id EmitFPSaturate16(EmitContext& ctx, Id value) { } Id EmitFPSaturate32(EmitContext& ctx, Id value) { - const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; - const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; + const Id zero{ctx.Const(f32{0.0})}; + const Id one{ctx.Const(f32{1.0})}; return Clamp(ctx, ctx.F32[1], value, zero, one); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7a4388e7e..90817f161 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -45,16 +45,12 @@ public: if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { throw LogicError("Invalid PTP arguments"); } - auto read{[&](unsigned int a, unsigned int b) { - return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); - }}; - - const Id offsets{ - ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)), - ctx.ConstantComposite(ctx.U32[2], read(0, 0), read(0, 1)), - ctx.ConstantComposite(ctx.U32[2], read(0, 2), read(0, 3)), - ctx.ConstantComposite(ctx.U32[2], read(1, 0), read(1, 1)), - ctx.ConstantComposite(ctx.U32[2], read(1, 2), read(1, 3)))}; + auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; + + const Id offsets{ctx.ConstantComposite( + ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)), + ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)), + ctx.Const(read(1, 2), read(1, 3)))}; Add(spv::ImageOperandsMask::ConstOffsets, offsets); } @@ -108,7 +104,7 @@ private: return; } if (offset.IsImmediate()) { - Add(spv::ImageOperandsMask::ConstOffset, ctx.Constant(ctx.U32[1], offset.U32())); + Add(spv::ImageOperandsMask::ConstOffset, ctx.Const(offset.U32())); return; } IR::Inst* const inst{offset.InstRecursive()}; @@ -361,9 +357,8 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id const auto info{inst->Flags()}; const ImageOperands operands(ctx, offset, offset2); return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, - ctx.F32[4], Texture(ctx, index), coords, - ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), - operands.Span()); + ctx.F32[4], Texture(ctx, index), coords, ctx.Const(info.gather_component), + operands.Mask(), operands.Span()); } Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 944f1e429..c12d0a513 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -44,7 +44,7 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c constexpr u32 s32_max{static_cast(std::numeric_limits::max())}; const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; - const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Constant(ctx.U32[1], s32_max), a)}; + const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a8f2ea5a0..7bf828995 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -11,14 +11,14 @@ namespace { Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { if (offset.IsImmediate()) { const u32 imm_offset{static_cast(offset.U32() / element_size)}; - return ctx.Constant(ctx.U32[1], imm_offset); + return ctx.Const(imm_offset); } const u32 shift{static_cast(std::countr_zero(element_size))}; const Id index{ctx.Def(offset)}; if (shift == 0) { return index; } - const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id shift_id{ctx.Const(shift)}; return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index fa2fc9ab4..710d1cd25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -7,22 +7,22 @@ namespace Shader::Backend::SPIRV { namespace { Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { - const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; + const Id shift_id{ctx.Const(shift)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); } Id Word(EmitContext& ctx, Id offset) { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; return ctx.OpLoad(ctx.U32[1], pointer); } std::pair ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { - const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; - const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; - const Id count_id{ctx.Constant(ctx.U32[1], count)}; + const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))}; + const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))}; + const Id count_id{ctx.Const(count)}; return {bit, count_id}; } } // Anonymous namespace @@ -83,9 +83,9 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; return ctx.OpLoad(ctx.U32[2], pointer); } else { - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; - const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; + const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), @@ -98,12 +98,11 @@ Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; return ctx.OpLoad(ctx.U32[4], pointer); } - const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift_id{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; std::array values{}; for (u32 i = 0; i < 4; ++i) { - const Id index{i == 0 ? base_index - : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; values[i] = ctx.OpLoad(ctx.U32[1], pointer); } @@ -134,7 +133,7 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { if (ctx.profile.support_explicit_workgroup_layout) { pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); } else { - const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); } @@ -147,9 +146,9 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { ctx.OpStore(pointer, value); return; } - const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift{ctx.Const(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; - const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; + const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); @@ -162,11 +161,10 @@ void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { ctx.OpStore(pointer, value); return; } - const Id shift{ctx.Constant(ctx.U32[1], 2U)}; + const Id shift{ctx.Const(2U)}; const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; for (u32 i = 0; i < 4; ++i) { - const Id index{i == 0 ? base_index - : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; + const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index ae8b39f41..d5430e905 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -19,7 +19,7 @@ void ConvertDepthMode(EmitContext& ctx) { void SetFixedPipelinePointSize(EmitContext& ctx) { if (ctx.profile.fixed_state_point_size) { const float point_size{*ctx.profile.fixed_state_point_size}; - ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size)); + ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); } } @@ -75,7 +75,7 @@ void AlphaTest(EmitContext& ctx) { const Id true_label{ctx.OpLabel()}; const Id discard_label{ctx.OpLabel()}; - const Id alpha_reference{ctx.Constant(ctx.F32[1], ctx.profile.alpha_test_reference)}; + const Id alpha_reference{ctx.Const(ctx.profile.alpha_test_reference)}; const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); @@ -88,8 +88,8 @@ void AlphaTest(EmitContext& ctx) { void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::VertexB) { - const Id zero{ctx.Constant(ctx.F32[1], 0.0f)}; - const Id one{ctx.Constant(ctx.F32[1], 1.0f)}; + const Id zero{ctx.Const(0.0f)}; + const Id one{ctx.Const(1.0f)}; const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; ctx.OpStore(ctx.output_position, default_vector); for (const auto& info : ctx.output_generics) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index a255f9ba7..239e2ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -54,7 +54,7 @@ Id EmitLaneId(EmitContext& ctx) { if (!ctx.profile.warp_size_potentially_larger_than_guest) { return id; } - return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Constant(ctx.U32[1], 31U)); + return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); } Id EmitVoteAll(EmitContext& ctx, Id pred) { @@ -168,10 +168,10 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id } Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { - const Id three{ctx.Constant(ctx.U32[1], 3)}; + const Id three{ctx.Const(3U)}; Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); - mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1)); + mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); -- cgit v1.2.3 From 850b08a16cecf260e7c8e07b81b5e0078622974d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:27:09 -0300 Subject: spirv: Be aware of NAN unaware drivers --- .../backend/spirv/emit_spirv_floating_point.cpp | 58 +++++++++++++++------- 1 file changed, 40 insertions(+), 18 deletions(-) (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 97d11cc63..b3afbef25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -22,6 +22,28 @@ Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { return ctx.OpFClamp(type, value, zero, one); } } + +Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)}; + const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))}; + const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))}; + return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan); + } else { + return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + } +} + +Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) { + if (ctx.profile.ignore_nan_fp_comparisons) { + const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)}; + const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)}; + const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)}; + return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan); + } else { + return (ctx.*comp_func)(ctx.U1, lhs, rhs); + } +} } // Anonymous namespace Id EmitFPAbs16(EmitContext& ctx, Id value) { @@ -227,27 +249,27 @@ Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); } Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); } Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); } Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + return FPOrdNotEqual(ctx, lhs, rhs); } Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + return FPOrdNotEqual(ctx, lhs, rhs); } Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); + return FPOrdNotEqual(ctx, lhs, rhs); } Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { @@ -275,15 +297,15 @@ Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); } Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); } Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); } Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { @@ -299,15 +321,15 @@ Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); } Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); } Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); } Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { @@ -323,15 +345,15 @@ Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); } Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); } Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); } Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { @@ -347,15 +369,15 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { } Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); } Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); } Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { - return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs); + return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); } Id EmitFPIsNan16(EmitContext& ctx, Id value) { -- cgit v1.2.3 From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: Move SPIR-V emission functions to their own header --- src/shader_recompiler/CMakeLists.txt | 2 + src/shader_recompiler/backend/bindings.h | 19 + src/shader_recompiler/backend/spirv/emit_context.h | 9 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 571 +------------------- .../backend/spirv/emit_spirv_atomic.cpp | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 1 + .../spirv/emit_spirv_bitwise_conversion.cpp | 1 + .../backend/spirv/emit_spirv_composite.cpp | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 1 + .../backend/spirv/emit_spirv_convert.cpp | 1 + .../backend/spirv/emit_spirv_floating_point.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 1 + .../backend/spirv/emit_spirv_image_atomic.cpp | 1 + .../backend/spirv/emit_spirv_instructions.h | 583 +++++++++++++++++++++ .../backend/spirv/emit_spirv_integer.cpp | 1 + .../backend/spirv/emit_spirv_logical.cpp | 1 + .../backend/spirv/emit_spirv_memory.cpp | 1 + .../backend/spirv/emit_spirv_select.cpp | 1 + .../backend/spirv/emit_spirv_shared_memory.cpp | 1 + .../backend/spirv/emit_spirv_special.cpp | 1 + .../backend/spirv/emit_spirv_undefined.cpp | 1 + .../backend/spirv/emit_spirv_warp.cpp | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- 26 files changed, 637 insertions(+), 579 deletions(-) create mode 100644 src/shader_recompiler/backend/bindings.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 0bcd714d6..6523615aa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(shader_recompiler STATIC + backend/bindings.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -13,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_image_atomic.cpp + backend/spirv/emit_spirv_instructions.h backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 30b08104d..8b000f1ec 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" @@ -17,14 +18,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -struct Bindings { - u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3f9adc902..0681dfd16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -9,6 +9,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 47d62b190..d8ab2d8ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,9 +4,12 @@ #pragma once +#include + #include #include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -16,569 +19,9 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding); -// Microinstruction emitters -Id EmitPhi(EmitContext& ctx, IR::Inst* inst); -void EmitVoid(EmitContext& ctx); -Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); -void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); -void EmitBarrier(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); -void EmitPrologue(EmitContext& ctx); -void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); -void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); -void EmitGetRegister(EmitContext& ctx); -void EmitSetRegister(EmitContext& ctx); -void EmitGetPred(EmitContext& ctx); -void EmitSetPred(EmitContext& ctx); -void EmitSetGotoVariable(EmitContext& ctx); -void EmitGetGotoVariable(EmitContext& ctx); -void EmitSetIndirectBranchVariable(EmitContext& ctx); -void EmitGetIndirectBranchVariable(EmitContext& ctx); -Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -void EmitGetZFlag(EmitContext& ctx); -void EmitGetSFlag(EmitContext& ctx); -void EmitGetCFlag(EmitContext& ctx); -void EmitGetOFlag(EmitContext& ctx); -void EmitSetZFlag(EmitContext& ctx); -void EmitSetSFlag(EmitContext& ctx); -void EmitSetCFlag(EmitContext& ctx); -void EmitSetOFlag(EmitContext& ctx); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitIsHelperInvocation(EmitContext& ctx); -Id EmitYDirection(EmitContext& ctx); -Id EmitLoadLocal(EmitContext& ctx, Id word_offset); -void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); -void EmitLoadGlobalU8(EmitContext& ctx); -void EmitLoadGlobalS8(EmitContext& ctx); -void EmitLoadGlobalU16(EmitContext& ctx); -void EmitLoadGlobalS16(EmitContext& ctx); -Id EmitLoadGlobal32(EmitContext& ctx, Id address); -Id EmitLoadGlobal64(EmitContext& ctx, Id address); -Id EmitLoadGlobal128(EmitContext& ctx, Id address); -void EmitWriteGlobalU8(EmitContext& ctx); -void EmitWriteGlobalS8(EmitContext& ctx); -void EmitWriteGlobalU16(EmitContext& ctx); -void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); -Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); -Id EmitLoadSharedU32(EmitContext& ctx, Id offset); -Id EmitLoadSharedU64(EmitContext& ctx, Id offset); -Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -void EmitCompositeConstructF64x2(EmitContext& ctx); -void EmitCompositeConstructF64x3(EmitContext& ctx); -void EmitCompositeConstructF64x4(EmitContext& ctx); -void EmitCompositeExtractF64x2(EmitContext& ctx); -void EmitCompositeExtractF64x3(EmitContext& ctx); -void EmitCompositeExtractF64x4(EmitContext& ctx); -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitBitCastU16F16(EmitContext& ctx); -Id EmitBitCastU32F32(EmitContext& ctx, Id value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); -Id EmitPackUint2x32(EmitContext& ctx, Id value); -Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x16(EmitContext& ctx, Id value); -Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); -Id EmitPackHalf2x16(EmitContext& ctx, Id value); -Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); -Id EmitPackDouble2x32(EmitContext& ctx, Id value); -Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); -void EmitGetZeroFromOp(EmitContext& ctx); -void EmitGetSignFromOp(EmitContext& ctx); -void EmitGetCarryFromOp(EmitContext& ctx); -void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitGetSparseFromOp(EmitContext& ctx); -void EmitGetInBoundsFromOp(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitIAbs64(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitGlobalAtomicIAdd32(EmitContext& ctx); -Id EmitGlobalAtomicSMin32(EmitContext& ctx); -Id EmitGlobalAtomicUMin32(EmitContext& ctx); -Id EmitGlobalAtomicSMax32(EmitContext& ctx); -Id EmitGlobalAtomicUMax32(EmitContext& ctx); -Id EmitGlobalAtomicInc32(EmitContext& ctx); -Id EmitGlobalAtomicDec32(EmitContext& ctx); -Id EmitGlobalAtomicAnd32(EmitContext& ctx); -Id EmitGlobalAtomicOr32(EmitContext& ctx); -Id EmitGlobalAtomicXor32(EmitContext& ctx); -Id EmitGlobalAtomicExchange32(EmitContext& ctx); -Id EmitGlobalAtomicIAdd64(EmitContext& ctx); -Id EmitGlobalAtomicSMin64(EmitContext& ctx); -Id EmitGlobalAtomicUMin64(EmitContext& ctx); -Id EmitGlobalAtomicSMax64(EmitContext& ctx); -Id EmitGlobalAtomicUMax64(EmitContext& ctx); -Id EmitGlobalAtomicInc64(EmitContext& ctx); -Id EmitGlobalAtomicDec64(EmitContext& ctx); -Id EmitGlobalAtomicAnd64(EmitContext& ctx); -Id EmitGlobalAtomicOr64(EmitContext& ctx); -Id EmitGlobalAtomicXor64(EmitContext& ctx); -Id EmitGlobalAtomicExchange64(EmitContext& ctx); -Id EmitGlobalAtomicAddF32(EmitContext& ctx); -Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); -Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitBindlessImageSampleImplicitLod(EmitContext&); -Id EmitBindlessImageSampleExplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); -Id EmitBindlessImageGather(EmitContext&); -Id EmitBindlessImageGatherDref(EmitContext&); -Id EmitBindlessImageFetch(EmitContext&); -Id EmitBindlessImageQueryDimensions(EmitContext&); -Id EmitBindlessImageQueryLod(EmitContext&); -Id EmitBindlessImageGradient(EmitContext&); -Id EmitBindlessImageRead(EmitContext&); -Id EmitBindlessImageWrite(EmitContext&); -Id EmitBoundImageSampleImplicitLod(EmitContext&); -Id EmitBoundImageSampleExplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); -Id EmitBoundImageGather(EmitContext&); -Id EmitBoundImageGatherDref(EmitContext&); -Id EmitBoundImageFetch(EmitContext&); -Id EmitBoundImageQueryDimensions(EmitContext&); -Id EmitBoundImageQueryLod(EmitContext&); -Id EmitBoundImageGradient(EmitContext&); -Id EmitBoundImageRead(EmitContext&); -Id EmitBoundImageWrite(EmitContext&); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset); -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); -Id EmitBindlessImageAtomicIAdd32(EmitContext&); -Id EmitBindlessImageAtomicSMin32(EmitContext&); -Id EmitBindlessImageAtomicUMin32(EmitContext&); -Id EmitBindlessImageAtomicSMax32(EmitContext&); -Id EmitBindlessImageAtomicUMax32(EmitContext&); -Id EmitBindlessImageAtomicInc32(EmitContext&); -Id EmitBindlessImageAtomicDec32(EmitContext&); -Id EmitBindlessImageAtomicAnd32(EmitContext&); -Id EmitBindlessImageAtomicOr32(EmitContext&); -Id EmitBindlessImageAtomicXor32(EmitContext&); -Id EmitBindlessImageAtomicExchange32(EmitContext&); -Id EmitBoundImageAtomicIAdd32(EmitContext&); -Id EmitBoundImageAtomicSMin32(EmitContext&); -Id EmitBoundImageAtomicUMin32(EmitContext&); -Id EmitBoundImageAtomicSMax32(EmitContext&); -Id EmitBoundImageAtomicUMax32(EmitContext&); -Id EmitBoundImageAtomicInc32(EmitContext&); -Id EmitBoundImageAtomicDec32(EmitContext&); -Id EmitBoundImageAtomicAnd32(EmitContext&); -Id EmitBoundImageAtomicOr32(EmitContext&); -Id EmitBoundImageAtomicXor32(EmitContext&); -Id EmitBoundImageAtomicExchange32(EmitContext&); -Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitLaneId(EmitContext& ctx); -Id EmitVoteAll(EmitContext& ctx, Id pred); -Id EmitVoteAny(EmitContext& ctx, Id pred); -Id EmitVoteEqual(EmitContext& ctx, Id pred); -Id EmitSubgroupBallot(EmitContext& ctx, Id pred); -Id EmitSubgroupEqMask(EmitContext& ctx); -Id EmitSubgroupLtMask(EmitContext& ctx); -Id EmitSubgroupLeMask(EmitContext& ctx); -Id EmitSubgroupGtMask(EmitContext& ctx); -Id EmitSubgroupGeMask(EmitContext& ctx); -Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); - -Id EmitDPdxFine(EmitContext& ctx, Id op_a); - -Id EmitDPdyFine(EmitContext& ctx, Id op_a); - -Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); - -Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, program, binding); +} } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 6e17d1c7e..053800eb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 705aebd81..e0b52a001 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 93a45d834..bb11f4f4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 079e226de..10ff4ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ef32184ea..8e57ff070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,6 +6,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index b4a6fbb93..6154c46be 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index acb8957fe..fd74e475f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index b3afbef25..61cf25f9c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6680cf1b3..5832104df 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp index 05bed22b9..d7f1a365a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..b5eec3cd1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,583 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" + +namespace IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 86e6a4f3b..06ab23b1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index bb434def2..b9a9500fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a6a3f3351..37a66095f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0b45db45e..c5b4f4720 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 710d1cd25..9a79fc7a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d5430e905..ba948f3c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 19b06dbe4..c9f469e90 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 239e2ecab..78b1e1ba7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c9ca1f005..6585817bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,7 +254,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( OGLProgram gl_program; gl_program.handle = glCreateProgram(); - Shader::Backend::SPIRV::Bindings binding; + Shader::Backend::Bindings binding; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -297,8 +297,7 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, program)}; OGLProgram gl_program; gl_program.handle = glCreateProgram(); AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { -- cgit v1.2.3