diff options
Diffstat (limited to 'src/video_core/shader')
30 files changed, 712 insertions, 672 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 812983a99..e4c438792 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -121,15 +121,15 @@ ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { return exit_method = ExitMethod::AlwaysReturn; } -BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { - BasicBlock basic_block; +NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { + NodeBlock basic_block; for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { pc = DecodeInstr(basic_block, pc); } return basic_block; } -u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { +u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { // Ignore sched instructions when generating code. if (IsSchedInstruction(pc, main_offset)) { return pc + 1; @@ -151,39 +151,39 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, "NeverExecute predicate not implemented"); - static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> - decoders = { - {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, - {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, - {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, - {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, - {OpCode::Type::Shift, &ShaderIR::DecodeShift}, - {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, - {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, - {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, - {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, - {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, - {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, - {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, - {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, - {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, - {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, - {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, - {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, - {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, - {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, - {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, - {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, - {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, - {OpCode::Type::Video, &ShaderIR::DecodeVideo}, - {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, - }; + static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { + {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, + {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, + {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, + {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, + {OpCode::Type::Shift, &ShaderIR::DecodeShift}, + {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, + {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, + {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, + {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, + {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, + {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, + {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, + {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, + {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, + {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, + {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, + {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, + {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, + {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, + {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, + {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, + {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, + {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, + {OpCode::Type::Video, &ShaderIR::DecodeVideo}, + {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, + }; std::vector<Node> tmp_block; if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { - pc = (this->*decoder->second)(tmp_block, bb, pc); + pc = (this->*decoder->second)(tmp_block, pc); } else { - pc = DecodeOther(tmp_block, bb, pc); + pc = DecodeOther(tmp_block, pc); } // Some instructions (like SSY) don't have a predicate field, they are always unconditionally @@ -192,11 +192,14 @@ u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { const auto pred_index = static_cast<u32>(instr.pred.pred_index); if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { - bb.push_back( - Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); + const Node conditional = + Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); + global_code.push_back(conditional); + bb.push_back(conditional); } else { for (auto& node : tmp_block) { - bb.push_back(std::move(node)); + global_code.push_back(node); + bb.push_back(node); } } diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 51b8d55d4..3190e2d7c 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::SubOp; -u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 37eef2bf2..baee89107 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 7b4f7d284..c2164ba50 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 4fd3db54e..0d139c0d2 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index cc9a76a19..9fd4b273e 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::OpCode; using Tegra::Shader::Pred; using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -41,7 +41,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); + SetInternalFlagsFromInteger(bb, value, instr.generates_cc); SetRegister(bb, instr.gpr0, value); break; } @@ -242,7 +242,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3 return pc; } -void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, +void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc) { constexpr u32 lop_iterations = 32; const Node one = Immediate(1); @@ -284,4 +284,4 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No SetRegister(bb, dest, value); } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index b26a6e473..3ed5ccc5a 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::Pred; using Tegra::Shader::PredicateResultMode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -54,9 +54,9 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& return pc; } -void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, - Node op_a, Node op_b, PredicateResultMode predicate_mode, - Pred predicate, bool sets_cc) { +void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, + Node op_b, PredicateResultMode predicate_mode, Pred predicate, + bool sets_cc) { const Node result = [&]() { switch (logic_op) { case LogicOperation::And: diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 0734141b0..6a95dc928 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 942d6729d..601d66f1f 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 728a393a1..55a6fbbf2 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -118,8 +118,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { value = [&]() { switch (instr.conversion.f2i.rounding) { - case Tegra::Shader::F2iRoundingOp::None: - return value; + case Tegra::Shader::F2iRoundingOp::RoundEven: + return Operation(OperationCode::FRoundEven, PRECISE, value); case Tegra::Shader::F2iRoundingOp::Floor: return Operation(OperationCode::FFloor, PRECISE, value); case Tegra::Shader::F2iRoundingOp::Ceil: @@ -146,4 +146,4 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index 52f39d3ff..0559cc8de 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index 9f9da2278..1bd6755dd 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index dd3aef6f2..9285b8d05 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index dfd7cb98f..748368555 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -14,7 +14,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 53c44ae5a..e68512692 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 43a0a9e10..7a07c5ec6 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -16,7 +16,7 @@ using Tegra::Shader::HalfType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 16eb3985f..a3bf17eba 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index daf97174b..aad836d24 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 3dd26da20..ea3c71eed 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -17,26 +17,8 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); - return 0; - } -} -u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -48,7 +30,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); - Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, + Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass, Tegra::Shader::IpaSampleMode::Default}; u64 next_element = instr.attribute.fmt20.element; @@ -160,7 +142,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { }(); const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); + const Node base_address = + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); const auto cbuf = std::get_if<CbufNode>(base_address); ASSERT(cbuf != nullptr); const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); @@ -246,197 +229,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { } break; } - case OpCode::Id::TEX: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); - } - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4: { - ASSERT(instr.tld4.array == 0); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), - "PTP is not implemented"); - - if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); - } - - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array)); - break; - } - case OpCode::Id::TLD4S: { - UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); - } - - const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - std::vector<Node> coords; - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - if (depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - coords.push_back(op_b); - } else { - coords.push_back(op_a); - coords.push_back(op_b); - } - const auto num_coords = static_cast<u32>(coords.size()); - coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); - - const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, num_coords}; - values[element] = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); - } - - WriteTexsInstructionFloat(bb, instr, values); - break; - } - case OpCode::Id::TXQ: { - if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); - } - - // TODO: The new commits on the texture refactor, change the way samplers work. - // Sadly, not all texture instructions specify the type of texture their sampler - // uses. This must be fixed at a later instance. - const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (instr.txq.IsComponentEnabled(element)) { - MetaTexture meta{sampler, element}; - const Node value = Operation(OperationCode::F4TextureQueryDimensions, - std::move(meta), GetRegister(instr.gpr8)); - SetTemporal(bb, indexer++, value); - } - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", - static_cast<u32>(instr.txq.query_type.Value())); - } - break; - } - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - - auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); - - std::vector<Node> coords; - - // TODO: Add coordinates for different samplers once other texture types are implemented. - switch (texture_type) { - case TextureType::Texture1D: - coords.push_back(GetRegister(instr.gpr8)); - break; - case TextureType::Texture2D: - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - break; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); - - // Fallback to interpreting as a 2D texture for now - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - texture_type = TextureType::Texture2D; - } - - for (u32 element = 0; element < 2; ++element) { - auto params = coords; - MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; - const Node value = - Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); - SetTemporal(bb, element, value); - } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); - } - - break; - } - case OpCode::Id::TLDS: { - const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - - WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); - break; - } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } @@ -444,328 +236,4 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { - const auto offset = static_cast<std::size_t>(sampler.index.Value()); - - // If this sampler has already been used, return the existing mapping. - const auto itr = - std::find_if(used_samplers.begin(), used_samplers.end(), - [&](const Sampler& entry) { return entry.GetOffset() == offset; }); - if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array && - itr->IsShadow() == is_shadow); - return *itr; - } - - // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_samplers.size(); - const Sampler entry{offset, next_index, type, is_array, is_shadow}; - return *used_samplers.emplace(entry).first; -} - -void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, - const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporal(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, - const Node4& components) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) - continue; - SetTemporal(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, - const Node4& components) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporal(bb, 0, first_value); - SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporal(0)); - SetRegister(bb, instr.gpr28, GetTemporal(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array, - std::size_t array_offset, std::size_t bias_offset, - std::vector<Node>&& coords) { - UNIMPLEMENTED_IF_MSG( - (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || - (texture_type == TextureType::TextureCube && is_array && depth_compare), - "This method is not supported."); - - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. - const bool gl_lod_supported = - !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || - (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); - - const OperationCode read_method = - lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; - - UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); - - std::optional<u32> array_offset_value; - if (is_array) - array_offset_value = static_cast<u32>(array_offset); - - const auto coords_count = static_cast<u32>(coords.size()); - - if (process_mode != TextureProcessMode::None && gl_lod_supported) { - if (process_mode == TextureProcessMode::LZ) { - coords.push_back(Immediate(0.0f)); - } else { - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers - coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); - } - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, coords_count, array_offset_value}; - values[element] = Operation(read_method, std::move(meta), std::move(params)); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( - texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in opengl the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - std::size_t array_offset{}; - if (is_array) { - array_offset = coords.size(); - coords.push_back(GetRegister(array_register)); - } - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 - // or in the next register if lod or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - coords.push_back(GetRegister(depth_register)); - } - // Fill ignored coordinates - while (coords.size() < total_coord_count) { - coords.push_back(Immediate(0)); - } - - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, - 0, std::move(coords)); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( - texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - std::size_t array_offset{}; - if (is_array) { - array_offset = coords.size(); - coords.push_back(GetRegister(array_register)); - } - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 - // or in the next register if lod or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - coords.push_back(GetRegister(depth_register)); - } - // Fill ignored coordinates - while (coords.size() < total_coord_count) { - coords.push_back(Immediate(0)); - } - - return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, - (coord_count > 2 ? 1 : 0), std::move(coords)); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array) { - const std::size_t coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - - for (size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - std::optional<u32> array_offset; - if (is_array) { - array_offset = static_cast<u32>(coords.size()); - coords.push_back(GetRegister(array_register)); - } - - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; - values[element] = - Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - const std::size_t type_coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - - std::vector<Node> coords; - - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - std::optional<u32> array_offset; - if (is_array) { - array_offset = static_cast<u32>(coords.size()); - coords.push_back(GetRegister(array_register)); - } - const auto coords_count = static_cast<u32>(coords.size()); - - if (lod_enabled) { - // When lod is used always is in grp20 - coords.push_back(GetRegister(instr.gpr20)); - } else { - coords.push_back(Immediate(0)); - } - - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto params = coords; - MetaTexture meta{sampler, element, coords_count, array_offset}; - values[element] = - Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); - } - return values; -} - -std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index c1e5f4efb..d750a2936 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -14,7 +14,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { instr.ipa.sample_mode.Value()}; const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); - const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); + Node value = attr; + const Tegra::Shader::Attribute::Index index = attribute.index.Value(); + if (index >= Tegra::Shader::Attribute::Index::Attribute_0 && + index <= Tegra::Shader::Attribute::Index::Attribute_31) { + // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. + // In theory by setting them as perspective, OpenGL does the perspective correction. + // A way must figured to reverse the last step of it. + if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + } + } + value = GetSaturatedFloat(value, instr.ipa.saturate); SetRegister(bb, instr.gpr0, value); break; @@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 1717f0653..83c61680e 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -13,7 +13,7 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Pred; -u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 8bd15fb00..d0495995d 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index bdb4424a6..f070e8912 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 6623f8ff9..951e85f44 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp new file mode 100644 index 000000000..a99ae19bf --- /dev/null +++ b/src/video_core/shader/decode/texture.cpp @@ -0,0 +1,534 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <vector> +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Register; +using Tegra::Shader::TextureMiscMode; +using Tegra::Shader::TextureProcessMode; +using Tegra::Shader::TextureType; + +static std::size_t GetCoordCount(TextureType texture_type) { + switch (texture_type) { + case TextureType::Texture1D: + return 1; + case TextureType::Texture2D: + return 2; + case TextureType::Texture3D: + case TextureType::TextureCube: + return 3; + default: + UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); + return 0; + } +} + +u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::TEX: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex.texture_type}; + const bool is_array = instr.tex.array != 0; + const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); + WriteTexInstructionFloat( + bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); + break; + } + case OpCode::Id::TEXS: { + const TextureType texture_type{instr.texs.GetTextureType()}; + const bool is_array{instr.texs.IsArrayTexture()}; + const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.texs.GetTextureProcessMode(); + + if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); + } + + const Node4 components = + GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); + + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(bb, instr, components); + } else { + WriteTexsInstructionHalfFloat(bb, instr, components); + } + break; + } + case OpCode::Id::TLD4: { + ASSERT(instr.tld4.array == 0); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), + "NDV is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), + "PTP is not implemented"); + + if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); + } + + const auto texture_type = instr.tld4.texture_type.Value(); + const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + const bool is_array = instr.tld4.array != 0; + WriteTexInstructionFloat(bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array)); + break; + } + case OpCode::Id::TLD4S: { + UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); + } + + const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); + const Node op_a = GetRegister(instr.gpr8); + const Node op_b = GetRegister(instr.gpr20); + + // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + std::vector<Node> coords; + if (depth_compare) { + // Note: TLD4S coordinate encoding works just like TEXS's + const Node op_y = GetRegister(instr.gpr8.Value() + 1); + coords.push_back(op_a); + coords.push_back(op_y); + coords.push_back(op_b); + } else { + coords.push_back(op_a); + coords.push_back(op_b); + } + const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); + + const auto& sampler = + GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); + } + + WriteTexsInstructionFloat(bb, instr, values); + break; + } + case OpCode::Id::TXQ: { + if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); + } + + // TODO: The new commits on the texture refactor, change the way samplers work. + // Sadly, not all texture instructions specify the type of texture their sampler + // uses. This must be fixed at a later instance. + const auto& sampler = + GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + + u32 indexer = 0; + switch (instr.txq.query_type) { + case Tegra::Shader::TextureQueryType::Dimension: { + for (u32 element = 0; element < 4; ++element) { + if (!instr.txq.IsComponentEnabled(element)) { + continue; + } + MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + const Node value = + Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + SetTemporal(bb, indexer++, value); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled texture query type: {}", + static_cast<u32>(instr.txq.query_type.Value())); + } + break; + } + case OpCode::Id::TMML: { + UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), + "NDV is not implemented"); + + if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); + } + + auto texture_type = instr.tmml.texture_type.Value(); + const bool is_array = instr.tmml.array != 0; + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + std::vector<Node> coords; + + // TODO: Add coordinates for different samplers once other texture types are implemented. + switch (texture_type) { + case TextureType::Texture1D: + coords.push_back(GetRegister(instr.gpr8)); + break; + case TextureType::Texture2D: + coords.push_back(GetRegister(instr.gpr8.Value() + 0)); + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + break; + default: + UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); + + // Fallback to interpreting as a 2D texture for now + coords.push_back(GetRegister(instr.gpr8.Value() + 0)); + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + texture_type = TextureType::Texture2D; + } + + for (u32 element = 0; element < 2; ++element) { + auto params = coords; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); + SetTemporal(bb, element, value); + } + for (u32 element = 0; element < 2; ++element) { + SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + } + + break; + } + case OpCode::Id::TLDS: { + const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; + const bool is_array{instr.tlds.IsArrayTexture()}; + + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); + + if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); + } + + WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, + bool is_array, bool is_shadow) { + const auto offset = static_cast<std::size_t>(sampler.index.Value()); + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == offset; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + +void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { + u32 dest_elem = 0; + for (u32 elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + SetTemporal(bb, dest_elem++, components[elem]); + } + // After writing values in temporals, move them to the real registers + for (u32 i = 0; i < dest_elem; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + } +} + +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, + const Node4& components) { + // TEXS has two destination registers and a swizzle. The first two elements in the swizzle + // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 + + u32 dest_elem = 0; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + SetTemporal(bb, dest_elem++, components[component]); + } + + for (u32 i = 0; i < dest_elem; ++i) { + if (i < 2) { + // Write the first two swizzle components to gpr0 and gpr0+1 + SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); + } else { + ASSERT(instr.texs.HasTwoDestinations()); + // Write the rest of the swizzle components to gpr28 and gpr28+1 + SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); + } + } +} + +void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, + const Node4& components) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + Node4 values; + u32 dest_elem = 0; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + values[dest_elem++] = components[component]; + } + if (dest_elem == 0) + return; + + std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); + + const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); + if (dest_elem <= 2) { + SetRegister(bb, instr.gpr0, first_value); + return; + } + + SetTemporal(bb, 0, first_value); + SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); + + SetRegister(bb, instr.gpr0, GetTemporal(0)); + SetRegister(bb, instr.gpr28, GetTemporal(1)); +} + +Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset) { + const bool is_array = array; + const bool is_shadow = depth_compare; + + UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || + (texture_type == TextureType::TextureCube && is_array && is_shadow), + "This method is not supported."); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + + const bool lod_needed = process_mode == TextureProcessMode::LZ || + process_mode == TextureProcessMode::LL || + process_mode == TextureProcessMode::LLA; + + // LOD selection (either via bias or explicit textureLod) not supported in GL for + // sampler2DArrayShadow and samplerCubeArrayShadow. + const bool gl_lod_supported = + !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); + + const OperationCode read_method = + (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture; + + UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); + + Node bias = {}; + Node lod = {}; + if (process_mode != TextureProcessMode::None && gl_lod_supported) { + switch (process_mode) { + case TextureProcessMode::LZ: + lod = Immediate(0.0f); + break; + case TextureProcessMode::LB: + // If present, lod or bias are always stored in the register indexed by the gpr20 + // field with an offset depending on the usage of the other registers + bias = GetRegister(instr.gpr20.Value() + bias_offset); + break; + case TextureProcessMode::LL: + lod = GetRegister(instr.gpr20.Value() + bias_offset); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); + break; + } + } + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto copy_coords = coords; + MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; + values[element] = Operation(read_method, meta, std::move(copy_coords)); + } + + return values; +} + +Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { + const bool lod_bias_enabled = + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + + const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::vector<Node> coords; + for (std::size_t i = 0; i < coord_count; ++i) { + coords.push_back(GetRegister(coord_register + i)); + } + // 1D.DC in OpenGL the 2nd component is ignored. + if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { + coords.push_back(Immediate(0.0f)); + } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + dc = GetRegister(depth_register); + } + + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); +} + +Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { + const bool lod_bias_enabled = + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + + const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + const u64 last_coord_register = + (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + const u32 bias_offset = coord_count > 2 ? 1 : 0; + + std::vector<Node> coords; + for (std::size_t i = 0; i < coord_count; ++i) { + const bool last = (i == (coord_count - 1)) && (coord_count > 1); + coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + dc = GetRegister(depth_register); + } + + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); +} + +Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, + bool is_array) { + const std::size_t coord_count = GetCoordCount(texture_type); + const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); + const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::vector<Node> coords; + for (size_t i = 0; i < coord_count; ++i) + coords.push_back(GetRegister(coord_register + i)); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); + } + + return values; +} + +Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { + const std::size_t type_coord_count = GetCoordCount(texture_type); + const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // if is array gpr20 is used + const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); + + const u64 last_coord_register = + ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + + std::vector<Node> coords; + for (std::size_t i = 0; i < type_coord_count; ++i) { + const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); + coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + // When lod is used always is in gpr20 + const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; + values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); + } + return values; +} + +std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( + TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, + std::size_t max_coords, std::size_t max_inputs) { + const std::size_t coord_count = GetCoordCount(texture_type); + + std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); + const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); + if (total_coord_count > max_coords || total_reg_count > max_inputs) { + UNIMPLEMENTED_MSG("Unsupported Texture operation"); + total_coord_count = std::min(total_coord_count, max_coords); + } + // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. + total_coord_count += + (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; + + return {coord_count, total_coord_count}; +} + +} // namespace VideoCommon::Shader
\ No newline at end of file diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index c3432356d..956c01d9b 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -15,7 +15,7 @@ using Tegra::Shader::Pred; using Tegra::Shader::VideoType; using Tegra::Shader::VmadShr; -u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 9cb864500..c34843307 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -12,7 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; -u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { +u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index d7747103e..ac5112d78 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -337,27 +337,27 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { } } -void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { +void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); } -void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { +void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); } -void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { +void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); } -void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { +void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); } -void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { +void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { SetRegister(bb, Register::ZeroIndex + 1 + id, value); } -void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { +void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { if (!sets_cc) { return; } @@ -366,7 +366,7 @@ void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_c LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); } -void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { +void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { if (!sets_cc) { return; } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6e42e3dfb..5bc3a3900 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -39,7 +39,7 @@ using NodeData = PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; using Node = const NodeData*; using Node4 = std::array<Node, 4>; -using BasicBlock = std::vector<Node>; +using NodeBlock = std::vector<Node>; constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; @@ -156,12 +156,12 @@ enum class OperationCode { Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 - F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 + Texture, /// (MetaTexture, float[N] coords) -> float4 + TextureLod, /// (MetaTexture, float[N] coords) -> float4 + TextureGather, /// (MetaTexture, float[N] coords) -> float4 + TextureQueryDimensions, /// (MetaTexture, float a) -> float4 + TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 + TexelFetch, /// (MetaTexture, int[N], int) -> float4 Branch, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void @@ -236,6 +236,11 @@ private: class ConstBuffer { public: + explicit ConstBuffer(u32 max_offset, bool is_indirect) + : max_offset{max_offset}, is_indirect{is_indirect} {} + + ConstBuffer() = default; + void MarkAsUsed(u64 offset) { max_offset = std::max(max_offset, static_cast<u32>(offset)); } @@ -252,6 +257,10 @@ public: return max_offset + sizeof(float); } + u32 GetMaxOffset() const { + return max_offset; + } + private: u32 max_offset{}; bool is_indirect{}; @@ -279,9 +288,12 @@ struct MetaHalfArithmetic { struct MetaTexture { const Sampler& sampler; + Node array{}; + Node depth_compare{}; + Node bias{}; + Node lod{}; + Node component{}; u32 element{}; - u32 coords_count{}; - std::optional<u32> array_index; }; constexpr MetaArithmetic PRECISE = {true}; @@ -530,7 +542,7 @@ public: Decode(); } - const std::map<u32, BasicBlock>& GetBasicBlocks() const { + const std::map<u32, NodeBlock>& GetBasicBlocks() const { return basic_blocks; } @@ -581,7 +593,7 @@ private: ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); - BasicBlock DecodeRange(u32 begin, u32 end); + NodeBlock DecodeRange(u32 begin, u32 end); /** * Decodes a single instruction from Tegra to IR. @@ -589,33 +601,34 @@ private: * @param pc Program counter. Offset to decode. * @return Next address to decode. */ - u32 DecodeInstr(BasicBlock& bb, u32 pc); - - u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); - u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); + u32 DecodeInstr(NodeBlock& bb, u32 pc); + + u32 DecodeArithmetic(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); + u32 DecodeBfe(NodeBlock& bb, u32 pc); + u32 DecodeBfi(NodeBlock& bb, u32 pc); + u32 DecodeShift(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); + u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); + u32 DecodeFfma(NodeBlock& bb, u32 pc); + u32 DecodeHfma2(NodeBlock& bb, u32 pc); + u32 DecodeConversion(NodeBlock& bb, u32 pc); + u32 DecodeMemory(NodeBlock& bb, u32 pc); + u32 DecodeTexture(NodeBlock& bb, u32 pc); + u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); + u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); + u32 DecodeFloatSet(NodeBlock& bb, u32 pc); + u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); + u32 DecodeHalfSet(NodeBlock& bb, u32 pc); + u32 DecodeVideo(NodeBlock& bb, u32 pc); + u32 DecodeXmad(NodeBlock& bb, u32 pc); + u32 DecodeOther(NodeBlock& bb, u32 pc); /// Internalizes node's data and returns a managed pointer to a clone of that node Node StoreNode(NodeData&& node_data); @@ -664,20 +677,20 @@ private: Node GetTemporal(u32 id); /// Sets a register. src value must be a number-evaluated node. - void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); + void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); /// Sets a predicate. src value must be a bool-evaluated node - void SetPredicate(BasicBlock& bb, u64 dest, Node src); + void SetPredicate(NodeBlock& bb, u64 dest, Node src); /// Sets an internal flag. src value must be a bool-evaluated node - void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); + void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); /// Sets a local memory address. address and value must be a number-evaluated node - void SetLocalMemory(BasicBlock& bb, Node address, Node value); + void SetLocalMemory(NodeBlock& bb, Node address, Node value); /// Sets a temporal. Internally it uses a post-RZ register - void SetTemporal(BasicBlock& bb, u32 id, Node value); + void SetTemporal(NodeBlock& bb, u32 id, Node value); /// Sets internal flags from a float - void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); + void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); /// Sets internal flags from an integer - void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); + void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); /// Conditionally absolute/negated float. Absolute is applied first Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); @@ -718,12 +731,12 @@ private: /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); - void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); - void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); - void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, @@ -745,23 +758,22 @@ private: bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array, std::size_t array_offset, std::size_t bias_offset, - std::vector<Node>&& coords); + Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); - void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, + void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, Tegra::Shader::PredicateResultMode predicate_mode, Tegra::Shader::Pred predicate, bool sets_cc); - void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, + void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc); - Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); + Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); - std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); template <typename... T> Node Operation(OperationCode code, const T*... operands) { @@ -803,7 +815,8 @@ private: u32 coverage_end{}; std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; - std::map<u32, BasicBlock> basic_blocks; + std::map<u32, NodeBlock> basic_blocks; + NodeBlock global_code; std::vector<std::unique_ptr<NodeData>> stored_nodes; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index d6d29ee9f..33b071747 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -11,7 +11,7 @@ namespace VideoCommon::Shader { namespace { -std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, +std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { const Node node = code[cursor]; @@ -19,12 +19,19 @@ std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, if (operation->GetCode() == operation_code) return {node, cursor}; } + if (const auto conditional = std::get_if<ConditionalNode>(node)) { + const auto& conditional_code = conditional->GetCode(); + const auto [found, internal_cursor] = FindOperation( + conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); + if (found) + return {found, cursor}; + } } return {}; } } // namespace -Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { +Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; @@ -50,10 +57,14 @@ Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { } return nullptr; } + if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { + const auto& conditional_code = conditional->GetCode(); + return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); + } return nullptr; } -std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, +std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) { for (; cursor >= 0; --cursor) { const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); |
