diff options
Diffstat (limited to 'src/video_core/shader')
21 files changed, 656 insertions, 361 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index fe467608e..a4a0319eb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -2,13 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <algorithm> -#include <memory> -#include "common/assert.h" +#include <tuple> + #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" #include "video_core/shader/const_buffer_locker.h" namespace VideoCommon::Shader { @@ -103,8 +102,8 @@ bool ConstBufferLocker::IsConsistent() const { } bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { - return keys == rhs.keys && bound_samplers == rhs.bound_samplers && - bindless_samplers == rhs.bindless_samplers; + return std::tie(keys, bound_samplers, bindless_samplers) == + std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 600e2f3c3..d32e2d657 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -4,10 +4,12 @@ #pragma once +#include <optional> #include <unordered_map> #include "common/common_types.h" #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/shader_type.h" namespace VideoCommon::Shader { @@ -20,7 +22,7 @@ using BindlessSamplerMap = * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader * compiler. with it, the shader can obtain required data from GPU state and store it for disk * shader compilation. - **/ + */ class ConstBufferLocker { public: explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d47c63d9f..b427ac873 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -16,7 +16,9 @@ #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { + namespace { + using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -68,15 +70,15 @@ struct CFGRebuildState { const ProgramCode& program_code; ConstBufferLocker& locker; u32 start{}; - std::vector<BlockInfo> block_info{}; - std::list<u32> inspect_queries{}; - std::list<Query> queries{}; - std::unordered_map<u32, u32> registered{}; - std::set<u32> labels{}; - std::map<u32, u32> ssy_labels{}; - std::map<u32, u32> pbk_labels{}; - std::unordered_map<u32, BlockStack> stacks{}; - ASTManager* manager; + std::vector<BlockInfo> block_info; + std::list<u32> inspect_queries; + std::list<Query> queries; + std::unordered_map<u32, u32> registered; + std::set<u32> labels; + std::map<u32, u32> ssy_labels; + std::map<u32, u32> pbk_labels; + std::unordered_map<u32, BlockStack> stacks; + ASTManager* manager{}; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -109,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { } Pred GetPredicate(u32 index, bool negated) { - return static_cast<Pred>(index + (negated ? 8 : 0)); + return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); } /** @@ -136,15 +138,13 @@ struct BranchIndirectInfo { s32 relative_position{}; }; -std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, - u32 start_address, u32 current_position) { - const u32 shader_start = state.start; - u32 pos = current_position; - BranchIndirectInfo result{}; - u64 track_register = 0; +struct BufferInfo { + u32 index; + u32 offset; +}; - // Step 0 Get BRX Info - const Instruction instr = {state.program_code[pos]}; +std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() != OpCode::Id::BRX) { return std::nullopt; @@ -152,86 +152,94 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& if (instr.brx.constant_buffer != 0) { return std::nullopt; } - track_register = instr.gpr8.Value(); - result.relative_position = instr.brx.GetBranchExtend(); - pos--; - bool found_track = false; + --pos; + return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); +} - // Step 1 Track LDC - while (pos >= shader_start) { - if (IsSchedInstruction(pos, shader_start)) { - pos--; +template <typename Result, typename TestCallable, typename PackCallable> +// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> +// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> +std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, + PackCallable pack) { + for (; pos >= state.start; --pos) { + if (IsSchedInstruction(pos, state.start)) { continue; } - const Instruction instr = {state.program_code[pos]}; + const Instruction instr = state.program_code[pos]; const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::LD_C) { - if (instr.gpr0.Value() == track_register && - instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { - result.buffer = instr.cbuf36.index.Value(); - result.offset = static_cast<u32>(instr.cbuf36.GetOffset()); - track_register = instr.gpr8.Value(); - pos--; - found_track = true; - break; - } + if (!opcode) { + continue; + } + if (test(instr, opcode->get())) { + --pos; + return std::make_optional(pack(instr, opcode->get())); } - pos--; } + return std::nullopt; +} - if (!found_track) { - return std::nullopt; - } - found_track = false; +std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, + u64 brx_tracked_register) { + return TrackInstruction<std::pair<BufferInfo, u64>>( + state, pos, + [brx_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::LD_C && + instr.gpr0.Value() == brx_tracked_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; + }, + [](auto instr, const auto& opcode) { + const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), + static_cast<u32>(instr.cbuf36.GetOffset())}; + return std::make_pair(info, instr.gpr8.Value()); + }); +} - // Step 2 Track SHL - while (pos >= shader_start) { - if (IsSchedInstruction(pos, shader_start)) { - pos--; - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { - if (instr.gpr0.Value() == track_register) { - track_register = instr.gpr8.Value(); - pos--; - found_track = true; - break; - } - } - pos--; +std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, + u64 ldc_tracked_register) { + return TrackInstruction<u64>(state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); +} + +std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, + u64 shl_tracked_register) { + return TrackInstruction<u32>(state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); +} + +std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { + const auto brx_info = GetBRXInfo(state, pos); + if (!brx_info) { + return std::nullopt; } + const auto [relative_position, brx_tracked_register] = *brx_info; - if (!found_track) { + const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); + if (!ldc_info) { return std::nullopt; } - found_track = false; + const auto [buffer_info, ldc_tracked_register] = *ldc_info; - // Step 3 Track IMNMX - while (pos >= shader_start) { - if (IsSchedInstruction(pos, shader_start)) { - pos--; - continue; - } - const Instruction instr = state.program_code[pos]; - const auto opcode = OpCode::Decode(instr); - if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { - if (instr.gpr0.Value() == track_register) { - track_register = instr.gpr8.Value(); - result.entries = instr.alu.GetSignedImm20_20() + 1; - pos--; - found_track = true; - break; - } - } - pos--; + const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); + if (!shl_tracked_register) { + return std::nullopt; } - if (!found_track) { + const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); + if (!entries) { return std::nullopt; } - return result; + + return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; } std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { @@ -420,30 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { - auto tmp = TrackBranchIndirectInfo(state, address, offset); - if (tmp) { - auto result = *tmp; - std::vector<CaseBranch> branches{}; - s32 pc_target = offset + result.relative_position; - for (u32 i = 0; i < result.entries; i++) { - auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); - if (!k) { - return {ParseResult::AbnormalFlow, parse_info}; - } - u32 value = *k; - u32 target = static_cast<u32>((value >> 3) + pc_target); - insert_label(state, target); - branches.emplace_back(value, target); - } - parse_info.end_address = offset; - parse_info.branch_info = MakeBranchInfo<MultiBranch>( - static_cast<u32>(instr.gpr8.Value()), std::move(branches)); - - return {ParseResult::ControlCaught, parse_info}; - } else { + const auto tmp = TrackBranchIndirectInfo(state, offset); + if (!tmp) { LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); + return {ParseResult::AbnormalFlow, parse_info}; } - return {ParseResult::AbnormalFlow, parse_info}; + + const auto result = *tmp; + const s32 pc_target = offset + result.relative_position; + std::vector<CaseBranch> branches; + for (u32 i = 0; i < result.entries; i++) { + auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!key) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *key; + u32 target = static_cast<u32>((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); + } + parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<MultiBranch>( + static_cast<u32>(instr.gpr8.Value()), std::move(branches)); + + return {ParseResult::ControlCaught, parse_info}; } default: break; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 21fb9cb83..22c3e5120 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -154,10 +154,10 @@ void ShaderIR::Decode() { LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); [[fallthrough]]; case CompileDepth::BruteForce: { + const auto shader_end = static_cast<u32>(program_code.size()); coverage_begin = main_offset; - const std::size_t shader_end = program_code.size(); coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; label++) { + for (u32 label = main_offset; label < shader_end; ++label) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); } break; diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 1473c282a..fcedd2af6 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. if (instr.fmul.tab5cb8_2 != 0) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); } if (instr.fmul.tab5c68_0 != 1) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); @@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { + LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); + // Currently RRO is only implemented as a register move. op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); SetRegister(bb, instr.gpr0, op_b); - LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index b06cbe441..ee7d9a29d 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { if (opcode->get().GetId() == OpCode::Id::HADD2_C || opcode->get().GetId() == OpCode::Id::HADD2_R) { - if (instr.alu_half.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 6466fc011..d179b9873 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index a33d242e9..371fae127 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -130,6 +130,25 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } + case OpCode::Id::FLO_R: + case OpCode::Id::FLO_C: + case OpCode::Id::FLO_IMM: { + Node value; + if (instr.flo.invert) { + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); + } + if (instr.flo.is_signed) { + value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); + } else { + value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); + } + if (instr.flo.sh) { + value = + Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); + } + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::SEL_C: case OpCode::Id::SEL_R: case OpCode::Id::SEL_IMM: { diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 32facd6ba..0eeb75559 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { case OpCode::Id::I2F_R: case OpCode::Id::I2F_C: case OpCode::Id::I2F_IMM: { - UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); - Node value = [&]() { + Node value = [&] { switch (opcode->get().GetId()) { case OpCode::Id::I2F_R: return GetRegister(instr.gpr20); @@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { return Immediate(0); } }(); + const bool input_signed = instr.conversion.is_input_signed; + + if (instr.conversion.src_size == Register::Size::Byte) { + const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8; + if (offset > 0) { + value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, + std::move(value), Immediate(offset)); + } + } else { + UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); + } + value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index ca2f39e8d..5973588d6 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); if (instr.ffma.tab5980_0 != 1) { - LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); } if (instr.ffma.tab5980_1 != 0) { - LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 48ca7a4af..848e46874 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.hset2.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index fec8f2dbe..310655619 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -19,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - LOG_DEBUG(HW_GPU, "ftz={}", static_cast<u32>(instr.hsetp2.ftz)); + if (instr.hsetp2.ftz != 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 335d78146..c934d0719 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -21,8 +21,10 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Register; namespace { -u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + +u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { switch (uniform_type) { + case Tegra::Shader::UniformType::UnsignedByte: case Tegra::Shader::UniformType::Single: return 1; case Tegra::Shader::UniformType::Double: @@ -35,6 +37,22 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { return 1; } } + +u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} + } // Anonymous namespace u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { @@ -168,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const auto [real_address_base, base_address, descriptor] = TrackGlobalMemory(bb, instr, false); - const u32 count = GetUniformTypeElementsCount(type); + const u32 count = GetLdgMemorySize(type); if (!real_address_base || !base_address) { // Tracking failed, load zeroes. for (u32 i = 0; i < count; ++i) { @@ -179,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + + if (type == Tegra::Shader::UniformType::UnsignedByte) { + // To handle unaligned loads get the byte used to dereferenced global memory + // and extract that byte from the loaded uint32. + Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); + byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); + + gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), + Immediate(8)); + } SetTemporary(bb, i, gmem); } + for (u32 i = 0; i < count; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } @@ -196,28 +224,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); - u64 next_element = instr.attribute.fmt20.element; - auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); + u64 element = instr.attribute.fmt20.element; + auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); - const auto StoreNextElement = [&](u32 reg_offset) { - const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index), - next_element, GetRegister(instr.gpr39)); + const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; + for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { + Node dest; + if (instr.attribute.fmt20.patch) { + const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); + dest = MakeNode<PatchNode>(offset); + } else { + dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, + GetRegister(instr.gpr39)); + } const auto src = GetRegister(instr.gpr0.Value() + reg_offset); bb.push_back(Operation(OperationCode::Assign, dest, src)); - // Load the next attribute element into the following register. If the element - // to load goes beyond the vec4 size, load the first element of the next - // attribute. - next_element = (next_element + 1) % 4; - next_index = next_index + (next_element == 0 ? 1 : 0); - }; - - const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; - for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { - StoreNextElement(reg_offset); + // Load the next attribute element into the following register. If the element to load + // goes beyond the vec4 size, load the first element of the next attribute. + element = (element + 1) % 4; + index = index + (element == 0 ? 1 : 0); } - break; } case OpCode::Id::ST_L: @@ -274,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } - const u32 count = GetUniformTypeElementsCount(type); + const u32 count = GetStgMemorySize(type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 116b95f76..7321698b2 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -69,6 +69,8 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { case OpCode::Id::MOV_SYS: { const Node value = [this, instr] { switch (instr.sys20) { + case SystemVariable::InvocationId: + return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: @@ -255,8 +257,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); break; } + case OpCode::Id::MEMBAR: { + UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); + UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); + bb.push_back(Operation(OperationCode::MemoryBarrierGL)); + break; + } case OpCode::Id::DEPBAR: { - LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); + LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index e6c9d287e..8d54cce34 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -13,37 +13,65 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +namespace { +constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7; +} + u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); + UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - const Node apply_mask = [&]() { + const Node apply_mask = [&] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: - return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); + case OpCode::Id::P2R_IMM: + return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); default: UNREACHABLE(); - return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); + return Immediate(0); } }(); - const Node mask = GetRegister(instr.gpr8); - const auto offset = static_cast<u32>(instr.r2p.byte) * 8; - constexpr u32 programmable_preds = 7; - for (u64 pred = 0; pred < programmable_preds; ++pred) { - const auto shift = static_cast<u32>(pred); + const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; + + switch (opcode->get().GetId()) { + case OpCode::Id::R2P_IMM: { + const Node mask = GetRegister(instr.gpr8); - const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); - const Node condition = - Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); + for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { + const auto shift = static_cast<u32>(pred); - const Node value_compare = BitfieldExtract(mask, offset + shift, 1); - const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); + const Node condition = + Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); - const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); - bb.push_back(Conditional(condition, {code})); + const Node value_compare = BitfieldExtract(mask, offset + shift, 1); + const Node value = + Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + + const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); + bb.push_back(Conditional(condition, {code})); + } + break; + } + case OpCode::Id::P2R_IMM: { + Node value = Immediate(0); + for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { + Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), + Immediate(0)); + value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit)); + } + value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask); + value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8); + + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); + break; } return pc; diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index ca690b58b..4b14cdf58 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex_b.texture_type}; const bool is_array = instr.tex_b.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); - if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); - } - const Node4 components = GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); @@ -101,78 +89,142 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { [[fallthrough]]; } case OpCode::Id::TLD4: { - ASSERT(instr.tld4.array == 0); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), "NDV is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), - "PTP is not implemented"); - - if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); - } - const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) : instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - WriteTexInstructionFloat( - bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless)); + const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) + : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); + WriteTexInstructionFloat(bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, + is_ptp, is_bindless)); break; } case OpCode::Id::TLD4S: { - UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); - } - - const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); + constexpr std::size_t num_coords = 2; + const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); + const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); const Node op_a = GetRegister(instr.gpr8); const Node op_b = GetRegister(instr.gpr20); // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. std::vector<Node> coords; - if (depth_compare) { + std::vector<Node> aoffi; + Node depth_compare; + if (is_depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const Node op_y = GetRegister(instr.gpr8.Value() + 1); coords.push_back(op_a); coords.push_back(op_y); - coords.push_back(op_b); + if (is_aoffi) { + aoffi = GetAoffiCoordinates(op_b, num_coords, true); + depth_compare = GetRegister(instr.gpr20.Value() + 1); + } else { + depth_compare = op_b; + } } else { + // There's no depth compare coords.push_back(op_a); - coords.push_back(op_b); + if (is_aoffi) { + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + aoffi = GetAoffiCoordinates(op_b, num_coords, true); + } else { + coords.push_back(op_b); + } } const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); - const auto& sampler = - GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); + const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare}; + const Sampler& sampler = *GetSampler(instr.sampler, info); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } - WriteTexsInstructionFloat(bb, instr, values, true); + if (instr.tld4s.fp16_flag) { + WriteTexsInstructionHalfFloat(bb, instr, values, true); + } else { + WriteTexsInstructionFloat(bb, instr, values, true); + } break; } - case OpCode::Id::TXQ_B: + case OpCode::Id::TXD_B: is_bindless = true; [[fallthrough]]; - case OpCode::Id::TXQ: { - if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); + case OpCode::Id::TXD: { + UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented"); + + u64 base_reg = instr.gpr8.Value(); + const auto derivate_reg = instr.gpr20.Value(); + const auto texture_type = instr.txd.texture_type.Value(); + const auto coord_count = GetCoordCount(texture_type); + + const Sampler* sampler = is_bindless + ? GetBindlessSampler(base_reg, {{texture_type, false, false}}) + : GetSampler(instr.sampler, {{texture_type, false, false}}); + Node4 values; + if (sampler == nullptr) { + for (u32 element = 0; element < values.size(); ++element) { + values[element] = Immediate(0); + } + WriteTexInstructionFloat(bb, instr, values); + break; + } + if (is_bindless) { + base_reg++; } + std::vector<Node> coords; + std::vector<Node> derivates; + for (std::size_t i = 0; i < coord_count; ++i) { + coords.push_back(GetRegister(base_reg + i)); + const std::size_t derivate = i * 2; + derivates.push_back(GetRegister(derivate_reg + derivate)); + derivates.push_back(GetRegister(derivate_reg + derivate + 1)); + } + + for (u32 element = 0; element < values.size(); ++element) { + MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element}; + values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); + } + + WriteTexInstructionFloat(bb, instr, values); + + break; + } + case OpCode::Id::TXQ_B: + is_bindless = true; + [[fallthrough]]; + case OpCode::Id::TXQ: { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. - const auto& sampler = - is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); + const Sampler* sampler = + is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + + if (sampler == nullptr) { + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.txq.IsComponentEnabled(element)) { + continue; + } + const Node value = Immediate(0); + SetTemporary(bb, indexer++, value); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } u32 indexer = 0; switch (instr.txq.query_type) { @@ -181,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -205,15 +257,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); - if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = - is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) - : GetSampler(instr.sampler, {{texture_type, is_array, false}}); + const Sampler* sampler = + is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + + if (sampler == nullptr) { + u32 indexer = 0; + for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } + const Node value = Immediate(0); + SetTemporary(bb, indexer++, value); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } std::vector<Node> coords; @@ -240,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -254,10 +316,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - if (instr.tld.nodep_flag) { - LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); - } - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); break; } @@ -269,10 +327,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); - } - const Node4 components = GetTldsCode(instr, texture_type, is_array); if (instr.tlds.fp32_flag) { @@ -289,68 +343,54 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, - std::optional<SamplerInfo> sampler_info) { - const auto offset = static_cast<u32>(sampler.index.Value()); - - TextureType type; - bool is_array; - bool is_shadow; +ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset, + std::optional<u32> buffer) { if (sampler_info) { - type = sampler_info->type; - is_array = sampler_info->is_array; - is_shadow = sampler_info->is_shadow; - } else if (const auto sampler = locker.ObtainBoundSampler(offset)) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { + return *sampler_info; + } + const auto sampler = + buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); + if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); - type = TextureType::Texture2D; - is_array = false; - is_shadow = false; + return SamplerInfo{TextureType::Texture2D, false, false, false}; } + return SamplerInfo{sampler->texture_type, sampler->is_array != 0, sampler->is_shadow != 0, + sampler->is_buffer != 0}; +} + +const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional<SamplerInfo> sampler_info) { + const auto offset = static_cast<u32>(sampler.index.Value()); + const auto info = GetSamplerInfo(sampler_info, offset); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); if (it != used_samplers.end()) { - ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && - it->IsShadow() == is_shadow); - return *it; + ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && + it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer); + return &*it; } // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow)); + return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, + info.is_buffer); } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_sampler, buffer, offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); ASSERT(base_sampler != nullptr); - - TextureType type; - bool is_array; - bool is_shadow; - if (sampler_info) { - type = sampler_info->type; - is_array = sampler_info->is_array; - is_shadow = sampler_info->is_shadow; - } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) { - type = sampler->texture_type.Value(); - is_array = sampler->is_array.Value() != 0; - is_shadow = sampler->is_shadow.Value() != 0; - } else { - LOG_WARNING(HW_GPU, "Unknown sampler info"); - type = TextureType::Texture2D; - is_array = false; - is_shadow = false; + if (base_sampler == nullptr) { + return nullptr; } + const auto info = GetSamplerInfo(sampler_info, offset, buffer); + // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), @@ -358,15 +398,15 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, return entry.GetBuffer() == buffer && entry.GetOffset() == offset; }); if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && - it->IsShadow() == is_shadow); - return *it; + ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && + it->IsShadow() == info.is_shadow); + return &*it; } // Otherwise create a new mapping for this sampler const auto next_index = static_cast<u32>(used_samplers.size()); - return used_samplers.emplace_back( - Sampler(next_index, offset, buffer, type, is_array, is_shadow)); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer); } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -409,14 +449,14 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const } void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { + const Node4& components, bool ignore_mask) { // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half // float instruction). Node4 values; u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) + if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) continue; values[dest_elem++] = components[component]; } @@ -451,17 +491,23 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) - : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); + const SamplerInfo info{texture_type, is_array, is_shadow, false}; + const Sampler* sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); + Node4 values; + if (sampler == nullptr) { + for (u32 element = 0; element < values.size(); ++element) { + values[element] = Immediate(0); + } + return values; + } const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not - // supported in GL for sampler2DArrayShadow and - // samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not supported in GL for + // sampler2DArrayShadow and samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -471,8 +517,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); - Node bias = {}; - Node lod = {}; + Node bias; + Node lod; if (process_mode != TextureProcessMode::None && gl_lod_supported) { switch (process_mode) { case TextureProcessMode::LZ: @@ -493,10 +539,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, } } - Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -593,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_bindless) { + bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { + ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); + const std::size_t coord_count = GetCoordCount(texture_type); // If enabled arrays index is always stored in the gpr8 field @@ -608,17 +655,26 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); - const auto& sampler = - is_bindless - ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) - : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + const SamplerInfo info{texture_type, is_array, depth_compare, false}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + : GetSampler(instr.sampler, info); + Node4 values; + if (sampler == nullptr) { + for (u32 element = 0; element < values.size(); ++element) { + values[element] = Immediate(0); + } + return values; + } - std::vector<Node> aoffi; + std::vector<Node> aoffi, ptp; if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); + } else if (is_ptp) { + ptp = GetPtpCoordinates( + {GetRegister(parameter_register++), GetRegister(parameter_register++)}); } - Node dc{}; + Node dc; if (depth_compare) { dc = GetRegister(parameter_register++); } @@ -626,11 +682,10 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) : Immediate(static_cast<u32>(instr.tld4.component)); - Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component, - element}; + MetaTexture meta{ + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -658,12 +713,12 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); + const auto& sampler = *GetSampler(instr.sampler); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -671,6 +726,8 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { } Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { + const Sampler& sampler = *GetSampler(instr.sampler); + const std::size_t type_coord_count = GetCoordCount(texture_type); const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; @@ -694,12 +751,24 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); + // Fill empty entries from the guest sampler + const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); + if (type_coord_count != entry_coord_count) { + LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); + + // When the size is higher we insert zeroes + for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { + coords.push_back(GetRegister(Register::ZeroIndex)); + } + + // Then we ensure the size matches the number of entries (dropping unused values) + coords.resize(entry_coord_count); + } Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; @@ -764,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor return aoffi; } +std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { + static constexpr u32 num_entries = 8; + + std::vector<Node> ptp; + ptp.reserve(num_entries); + + const auto global_size = static_cast<s64>(global_code.size()); + const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); + const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); + if (!low || !high) { + for (u32 entry = 0; entry < num_entries; ++entry) { + const u32 reg = entry / 4; + const u32 offset = entry % 4; + const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); + const Node condition = + Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); + const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); + ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); + } + return ptp; + } + + const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); + for (u32 entry = 0; entry < num_entries; ++entry) { + s32 value = (immediate >> (entry * 8)) & 0b111111; + if (value >= 32) { + value -= 64; + } + ptp.push_back(Immediate(value)); + } + + return ptp; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..11b77f795 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { + OperationCode GetOperationCode(VoteOperation vote_op) { switch (vote_op) { case VoteOperation::All: @@ -30,12 +31,16 @@ OperationCode GetOperationCode(VoteOperation vote_op) { return OperationCode::VoteAll; } } + } // Anonymous namespace u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); + // Signal the backend that this shader uses warp instructions. + uses_warps = true; + switch (opcode->get().GetId()) { case OpCode::Id::VOTE: { const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); @@ -46,50 +51,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::SHFL: { - Node width = [this, instr] { - Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) - : GetRegister(instr.gpr39); - - // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has - // been done reversing Nvidia's math. It won't work on all cases due to SHFL having - // different parameters that don't properly map to GLSL's interface, but it should work - // for cases emitted by Nvidia's compiler. - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation( - OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), - Immediate(8)); - } else { - return Operation(OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, Immediate(0x201F), - Operation(OperationCode::INegate, std::move(mask))), - Immediate(8)); - } - }(); + Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) + : GetRegister(instr.gpr39); + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); - const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + Node thread_id = Operation(OperationCode::ThreadId); + Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); + Node seg_mask = BitfieldExtract(mask, 8, 16); + + Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); + Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); + Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, + Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); + + Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { switch (instr.shfl.operation) { case ShuffleOperation::Idx: - return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; - case ShuffleOperation::Up: - return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + return Operation(OperationCode::IBitwiseOr, + Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), + min_thread_id); case ShuffleOperation::Down: - return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + return Operation(OperationCode::IAdd, thread_id, index); + case ShuffleOperation::Up: + return Operation(OperationCode::IAdd, thread_id, + Operation(OperationCode::INegate, index)); case ShuffleOperation::Bfly: - return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + return Operation(OperationCode::IBitwiseXor, thread_id, index); } - UNREACHABLE_MSG("Invalid SHFL operation: {}", - static_cast<u64>(instr.shfl.operation.Value())); - return {}; + UNREACHABLE(); + return Immediate(0U); }(); - // Setting the predicate before the register is intentional to avoid overwriting. - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); + } else { + return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); + } + }(); + + SetPredicate(bb, instr.shfl.pred48, in_bounds); SetRegister( bb, instr.gpr0, - Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); + break; + } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); break; } default: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..4e155542a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode { FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float + FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int @@ -67,6 +68,7 @@ enum class OperationCode { IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int IBitCount, /// (MetaArithmetic, int) -> int + IBitMSB, /// (MetaArithmetic, int) -> int UAdd, /// (MetaArithmetic, uint a, uint b) -> uint UMul, /// (MetaArithmetic, uint a, uint b) -> uint @@ -85,6 +87,7 @@ enum class OperationCode { UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint UBitCount, /// (MetaArithmetic, uint) -> uint + UBitMSB, /// (MetaArithmetic, uint) -> uint HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 @@ -148,6 +151,7 @@ enum class OperationCode { TextureQueryDimensions, /// (MetaTexture, float a) -> float4 TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 + TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 ImageLoad, /// (MetaImage, int[N] coords) -> void ImageStore, /// (MetaImage, int[N] coords) -> void @@ -168,6 +172,7 @@ enum class OperationCode { EmitVertex, /// () -> void EndPrimitive, /// () -> void + InvocationId, /// () -> int YNegate, /// () -> float LocalInvocationIdX, /// () -> uint LocalInvocationIdY, /// () -> uint @@ -181,15 +186,10 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool - ShuffleIndexed, /// (uint value, uint index, uint width) -> uint - ShuffleUp, /// (uint value, uint index, uint width) -> uint - ShuffleDown, /// (uint value, uint index, uint width) -> uint - ShuffleButterfly, /// (uint value, uint index, uint width) -> uint + ThreadId, /// () -> uint + ShuffleIndexed, /// (uint value, uint index) -> uint - InRangeShuffleIndexed, /// (uint index, uint width) -> bool - InRangeShuffleUp, /// (uint index, uint width) -> bool - InRangeShuffleDown, /// (uint index, uint width) -> bool - InRangeShuffleButterfly, /// (uint index, uint width) -> bool + MemoryBarrierGL, /// () -> void Amount, }; @@ -216,13 +216,14 @@ class PredicateNode; class AbufNode; class CbufNode; class LmemNode; +class PatchNode; class SmemNode; class GmemNode; class CommentNode; -using NodeData = - std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, - PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; +using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, + InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, + LmemNode, SmemNode, GmemNode, CommentNode>; using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; @@ -231,14 +232,15 @@ class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + bool is_array, bool is_shadow, bool is_buffer) + : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_buffer{is_buffer} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) + bool is_array, bool is_shadow, bool is_buffer) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} constexpr u32 GetIndex() const { return index; @@ -264,6 +266,10 @@ public: return is_shadow; } + constexpr bool IsBuffer() const { + return is_buffer; + } + constexpr bool IsBindless() const { return is_bindless; } @@ -276,6 +282,7 @@ private: Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; @@ -367,6 +374,8 @@ struct MetaTexture { Node array; Node depth_compare; std::vector<Node> aoffi; + std::vector<Node> ptp; + std::vector<Node> derivates; Node bias; Node lod; Node component{}; @@ -383,8 +392,30 @@ struct MetaImage { using Meta = std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; +class AmendNode { +public: + std::optional<std::size_t> GetAmendIndex() const { + if (amend_index == amend_null_index) { + return std::nullopt; + } + return {amend_index}; + } + + void SetAmendIndex(std::size_t index) { + amend_index = index; + } + + void ClearAmend() { + amend_index = amend_null_index; + } + +private: + static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; + std::size_t amend_index{amend_null_index}; +}; + /// Holds any kind of operation that can be done in the IR -class OperationNode final { +class OperationNode final : public AmendNode { public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} @@ -424,7 +455,7 @@ private: }; /// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final { +class ConditionalNode final : public AmendNode { public: explicit ConditionalNode(Node condition, std::vector<Node>&& code) : condition{std::move(condition)}, code{std::move(code)} {} @@ -538,6 +569,19 @@ private: u32 element{}; }; +/// Patch memory (used to communicate tessellation stages). +class PatchNode final { +public: + explicit PatchNode(u32 offset) : offset{offset} {} + + u32 GetOffset() const { + return offset; + } + +private: + u32 offset{}; +}; + /// Constant buffer node, usually mapped to uniform buffers in GLSL class CbufNode final { public: diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 1d9825c76..31eecb3f4 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -446,4 +446,10 @@ Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { Immediate(bits)); } +std::size_t ShaderIR::DeclareAmend(Node new_amend) { + const std::size_t id = amend_code.size(); + amend_code.push_back(new_amend); + return id; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 26c8fde22..aacd0a0da 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -49,7 +49,7 @@ public: } u32 GetSize() const { - return max_offset + sizeof(float); + return max_offset + static_cast<u32>(sizeof(float)); } u32 GetMaxOffset() const { @@ -137,6 +137,10 @@ public: return uses_vertex_id; } + bool UsesWarps() const { + return uses_warps; + } + bool HasPhysicalAttributes() const { return uses_physical_attributes; } @@ -165,13 +169,17 @@ public: return program_manager.GetVariables(); } - u32 ConvertAddressToNvidiaSpace(const u32 address) const { - return (address - main_offset) * sizeof(Tegra::Shader::Instruction); + u32 ConvertAddressToNvidiaSpace(u32 address) const { + return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); } /// Returns a condition code evaluated from internal flags Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; + const Node& GetAmendNode(std::size_t index) const { + return amend_code[index]; + } + private: friend class ASTDecoder; @@ -179,6 +187,7 @@ private: Tegra::Shader::TextureType type; bool is_array; bool is_shadow; + bool is_buffer; }; void Decode(); @@ -303,13 +312,17 @@ private: /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); + /// Queries the missing sampler info from the execution context. + SamplerInfo GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset, + std::optional<u32> buffer = std::nullopt); + /// Accesses a texture sampler - const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - std::optional<SamplerInfo> sampler_info); + const Sampler* GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional<SamplerInfo> sampler_info = std::nullopt); - // Accesses a texture sampler for a bindless texture. - const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - std::optional<SamplerInfo> sampler_info); + /// Accesses a texture sampler for a bindless texture. + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, + std::optional<SamplerInfo> sampler_info = std::nullopt); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); @@ -329,7 +342,7 @@ private: void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components, bool ignore_mask = false); void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); + const Node4& components, bool ignore_mask = false); Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, @@ -341,7 +354,8 @@ private: bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); + bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, + bool is_bindless); Node4 GetTldCode(Tegra::Shader::Instruction instr); @@ -354,6 +368,8 @@ private: std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); + std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); + Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, @@ -380,6 +396,9 @@ private: Tegra::Shader::Instruction instr, bool is_write); + /// Register new amending code and obtain the reference id. + std::size_t DeclareAmend(Node new_amend); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -394,6 +413,7 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; ASTManager program_manager{true, true}; + std::vector<Node> amend_code; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; @@ -410,6 +430,7 @@ private: bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes bool uses_instance_id{}; bool uses_vertex_id{}; + bool uses_warps{}; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 55f5949e4..165c79330 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -7,6 +7,7 @@ #include <variant> #include "common/common_types.h" +#include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { |
