diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/decode.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader/decode/half_set.cpp | 88 | ||||
| -rw-r--r-- | src/video_core/shader/decode/image.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 55 | ||||
| -rw-r--r-- | src/video_core/shader/decode/xmad.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader/memory_util.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 105 | ||||
| -rw-r--r-- | src/video_core/shader/node_helper.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader/registry.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/shader/registry.h | 35 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 109 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 14 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 78 |
15 files changed, 385 insertions, 210 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a75a5cc63..eeac328a6 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -255,7 +255,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); Node op_b = Immediate(branch_case.cmp_value); Node condition = - GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); auto result = Conditional(condition, {n}); bb.push_back(result); global_code.push_back(result); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 848e46874..b2e88fa20 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -13,55 +13,101 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + PredCondition cond; + bool bf; + bool ftz; + bool neg_a; + bool abs_a; + bool neg_b; + bool abs_b; + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_C: + case OpCode::Id::HSET2_IMM: + cond = instr.hsetp2.cbuf_and_imm.cond; + bf = instr.Bit(53); + ftz = instr.Bit(54); + neg_a = instr.Bit(43); + abs_a = instr.Bit(44); + neg_b = instr.Bit(56); + abs_b = instr.Bit(54); + break; + case OpCode::Id::HSET2_R: + cond = instr.hsetp2.reg.cond; + bf = instr.Bit(49); + ftz = instr.Bit(50); + neg_a = instr.Bit(43); + abs_a = instr.Bit(44); + neg_b = instr.Bit(31); + abs_b = instr.Bit(30); + break; + default: + UNREACHABLE(); } - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); - - Node op_b = [&]() { + Node op_b = [this, instr, opcode] { switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_C: + // Inform as unimplemented as this is not tested. + UNIMPLEMENTED_MSG("HSET2_C is not implemented"); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); case OpCode::Id::HSET2_R: return GetRegister(instr.gpr20); + case OpCode::Id::HSET2_IMM: + return UnpackHalfImmediate(instr, true); default: UNREACHABLE(); - return Immediate(0); + return Node{}; } }(); - op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); - op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); - const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + if (!ftz) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } + + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); + op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); + + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_R: + op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); + [[fallthrough]]; + case OpCode::Id::HSET2_C: + op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); + break; + default: + break; + } - const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); + Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + + Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); // HSET2 operates on each half float in the pack. std::array<Node, 2> values; for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; - const Node true_value = Immediate(raw_value << (i * 16)); - const Node false_value = Immediate(0); - - const Node comparison = - Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - const Node predicate = Operation(combiner, comparison, second_pred); + const u32 raw_value = bf ? 0x3c00 : 0xffff; + Node true_value = Immediate(raw_value << (i * 16)); + Node false_value = Immediate(0); + Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); + Node predicate = Operation(combiner, comparison, second_pred); values[i] = - Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); + Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); } - const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); - SetRegister(bb, instr.gpr0, value); + Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); + SetRegister(bb, instr.gpr0, move(value)); return pc; } diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 60b6ad72a..07778dc3e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, break; case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: + case TextureFormat::BF10GF11RF11: if (component == 0) { return descriptor.b_type; } @@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, } break; } - UNIMPLEMENTED_MSG("texture format not implement={}", format); + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return ComponentType::FLOAT; } @@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 6; } return 0; + case TextureFormat::BF10GF11RF11: + if (component == 1 || component == 2) { + return 11; + } + if (component == 0) { + return 10; + } + return 0; case TextureFormat::G8R24: if (component == 0) { return 8; @@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; - default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); - return 0; } + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return 0; } std::size_t GetImageComponentMask(TextureFormat format) { @@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R32_B24G8: case TextureFormat::B5G6R5: case TextureFormat::B6G5R5: + case TextureFormat::BF10GF11RF11: return std::size_t{R | G | B}; case TextureFormat::R32_G32: case TextureFormat::R16_G16: @@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R8: case TextureFormat::R1: return std::size_t{R}; - default: - UNIMPLEMENTED_MSG("texture format not implement={}", format); - return std::size_t{R | G | B | A}; } + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return std::size_t{R | G | B | A}; } std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { @@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, return {std::move(original_value), true}; } default: - UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); return {std::move(original_value), true}; } } @@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { default: break; } - UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", + UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", static_cast<u64>(instr.suatom_d.operation.Value()), static_cast<u64>(instr.suatom_d.operation_type.Value())); return OperationCode::AtomicImageAdd; diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 9392f065b..63adbc4a3 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } case OpCode::Id::RED: { UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); - UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { @@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); Node value = GetRegister(instr.gpr0); - bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); break; } case OpCode::Id::ATOM: { diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d4f95b18c..c0a8f233f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); - return Immediate(0U); + return Immediate(0x00ff'0000U); case SystemVariable::WscaleFactorXY: UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); return Immediate(0U); @@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Operation(OperationCode::WorkGroupIdY); case SystemVariable::CtaIdZ: return Operation(OperationCode::WorkGroupIdZ); + case SystemVariable::EqMask: + case SystemVariable::LtMask: + case SystemVariable::LeMask: + case SystemVariable::GtMask: + case SystemVariable::GeMask: + uses_warps = true; + switch (instr.sys20) { + case SystemVariable::EqMask: + return Operation(OperationCode::ThreadEqMask); + case SystemVariable::LtMask: + return Operation(OperationCode::ThreadLtMask); + case SystemVariable::LeMask: + return Operation(OperationCode::ThreadLeMask); + case SystemVariable::GtMask: + return Operation(OperationCode::ThreadGtMask); + case SystemVariable::GeMask: + return Operation(OperationCode::ThreadGeMask); + default: + UNREACHABLE(); + return Immediate(0u); + } default: UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); @@ -272,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); break; } + case OpCode::Id::BAR: { + UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); + bb.push_back(Operation(OperationCode::Barrier)); + break; + } case OpCode::Id::MEMBAR: { - UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); - bb.push_back(Operation(OperationCode::MemoryBarrierGL)); + const OperationCode type = [instr] { + switch (instr.membar.type) { + case Tegra::Shader::MembarType::CTA: + return OperationCode::MemoryBarrierGroup; + case Tegra::Shader::MembarType::GL: + return OperationCode::MemoryBarrierGlobal; + default: + UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); + return OperationCode::MemoryBarrierGlobal; + } + }(); + bb.push_back(Operation(type)); break; } case OpCode::Id::DEPBAR: { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 8f0bb996e..29ebf65ba 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, - std::optional<u32> buffer) { +ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( + SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { if (info.IsComplete()) { return info; } - const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) - : registry.ObtainBoundSampler(offset); if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); @@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo sampler_info) { - const auto offset = static_cast<u32>(sampler.index.Value()); - const auto info = GetSamplerInfo(sampler_info, offset); + const u32 offset = static_cast<u32>(sampler.index.Value()); + const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), @@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_node != nullptr); - if (base_node == nullptr) { + if (!base_node) { + UNREACHABLE(); return std::nullopt; } - if (const auto bindless_sampler_info = - std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { - const u32 buffer = bindless_sampler_info->GetIndex(); - const u32 offset = bindless_sampler_info->GetOffset(); - info = GetSamplerInfo(info, offset, buffer); + if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { + const u32 buffer = sampler_info->index; + const u32 offset = sampler_info->offset; + info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { + [buffer, offset](const Sampler& entry) { return entry.buffer == buffer && entry.offset == offset; }); if (it != used_samplers.end()) { @@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, *info.is_shadow, *info.is_buffer, false); } - if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { - const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; - index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); - info = GetSamplerInfo(info, base_offset); + if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { + const std::pair indices = sampler_info->indices; + const std::pair offsets = sampler_info->offsets; + info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); + + // Try to use an already created sampler if it exists + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { + return offsets == std::pair{entry.offset, entry.secondary_offset} && + indices == std::pair{entry.buffer, entry.secondary_buffer}; + }); + if (it != used_samplers.end()) { + ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && + it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); + return *it; + } + + // Otherwise create a new mapping for this sampler + const u32 next_index = static_cast<u32>(used_samplers.size()); + return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, + *info.is_shadow, *info.is_buffer); + } + if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { + const u32 base_offset = sampler_info->base_offset / 4; + index_var = GetCustomVariable(sampler_info->bindless_var); + info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); // If this sampler has already been used, return the existing mapping. const auto it = std::find_if( diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 6191ffba1..c83dc6615 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -97,19 +97,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); } case Tegra::Shader::XmadMode::CSfu: { - const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a, - op_a, Immediate(0)); - const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b, - op_b, Immediate(0)); + const Node comp_a = + GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); + const Node comp_b = + GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); const Node comp_minus_a = GetPredicateComparisonInteger( - PredCondition::NotEqual, is_signed_a, + PredCondition::NE, is_signed_a, SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, Immediate(0x80000000)), Immediate(0)); const Node comp_minus_b = GetPredicateComparisonInteger( - PredCondition::NotEqual, is_signed_b, + PredCondition::NE, is_signed_b, SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, Immediate(0x80000000)), Immediate(0)); diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp index 074f21691..5071c83ca 100644 --- a/src/video_core/shader/memory_util.cpp +++ b/src/video_core/shader/memory_util.cpp @@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, const ProgramCode& code_b) { - u64 unique_identifier = boost::hash_value(code); + size_t unique_identifier = boost::hash_value(code); if (is_a) { // VertexA programs include two programs boost::hash_combine(unique_identifier, boost::hash_value(code_b)); } - return unique_identifier; + return static_cast<u64>(unique_identifier); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 601c822d2..8f230d57a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -110,13 +110,20 @@ enum class OperationCode { LogicalPick2, /// (bool2 pair, uint index) -> bool LogicalAnd2, /// (bool2 a) -> bool - LogicalFLessThan, /// (float a, float b) -> bool - LogicalFEqual, /// (float a, float b) -> bool - LogicalFLessEqual, /// (float a, float b) -> bool - LogicalFGreaterThan, /// (float a, float b) -> bool - LogicalFNotEqual, /// (float a, float b) -> bool - LogicalFGreaterEqual, /// (float a, float b) -> bool - LogicalFIsNan, /// (float a) -> bool + LogicalFOrdLessThan, /// (float a, float b) -> bool + LogicalFOrdEqual, /// (float a, float b) -> bool + LogicalFOrdLessEqual, /// (float a, float b) -> bool + LogicalFOrdGreaterThan, /// (float a, float b) -> bool + LogicalFOrdNotEqual, /// (float a, float b) -> bool + LogicalFOrdGreaterEqual, /// (float a, float b) -> bool + LogicalFOrdered, /// (float a, float b) -> bool + LogicalFUnordered, /// (float a, float b) -> bool + LogicalFUnordLessThan, /// (float a, float b) -> bool + LogicalFUnordEqual, /// (float a, float b) -> bool + LogicalFUnordLessEqual, /// (float a, float b) -> bool + LogicalFUnordGreaterThan, /// (float a, float b) -> bool + LogicalFUnordNotEqual, /// (float a, float b) -> bool + LogicalFUnordGreaterEqual, /// (float a, float b) -> bool LogicalILessThan, /// (int a, int b) -> bool LogicalIEqual, /// (int a, int b) -> bool @@ -219,9 +226,16 @@ enum class OperationCode { VoteEqual, /// (bool) -> bool ThreadId, /// () -> uint + ThreadEqMask, /// () -> uint + ThreadGeMask, /// () -> uint + ThreadGtMask, /// () -> uint + ThreadLeMask, /// () -> uint + ThreadLtMask, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint - MemoryBarrierGL, /// () -> void + Barrier, /// () -> void + MemoryBarrierGroup, /// () -> void + MemoryBarrierGlobal, /// () -> void Amount, }; @@ -261,10 +275,11 @@ using Node = std::shared_ptr<NodeData>; using Node4 = std::array<Node, 4>; using NodeBlock = std::vector<Node>; -class BindlessSamplerNode; -class ArraySamplerNode; +struct ArraySamplerNode; +struct BindlessSamplerNode; +struct SeparateSamplerNode; -using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; +using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; using TrackSampler = std::shared_ptr<TrackSamplerData>; struct Sampler { @@ -274,63 +289,51 @@ struct Sampler { : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_buffer{is_buffer}, is_indexed{is_indexed} {} + /// Separate sampler constructor + constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow, + bool is_buffer) + : index{index}, offset{offsets.first}, secondary_offset{offsets.second}, + buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array}, + is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {} + /// Bindless samplers constructor constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} - u32 index = 0; ///< Emulated index given for the this sampler. - u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. - u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). - u32 size = 1; ///< Size of the sampler. + u32 index = 0; ///< Emulated index given for the this sampler. + u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. + u32 secondary_offset = 0; ///< Secondary offset in the const buffer. + u32 buffer = 0; ///< Buffer where the bindless sampler is read. + u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. + u32 size = 1; ///< Size of the sampler. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. - bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. - bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. - bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. + bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. + bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. + bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. + bool is_separated = false; ///< Whether the image and sampler is separated or not. }; /// Represents a tracked bindless sampler into a direct const buffer -class ArraySamplerNode final { -public: - explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) - : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} - - constexpr u32 GetIndex() const { - return index; - } - - constexpr u32 GetBaseOffset() const { - return base_offset; - } - - constexpr u32 GetIndexVar() const { - return bindless_var; - } - -private: +struct ArraySamplerNode { u32 index; u32 base_offset; u32 bindless_var; }; -/// Represents a tracked bindless sampler into a direct const buffer -class BindlessSamplerNode final { -public: - explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} - - constexpr u32 GetIndex() const { - return index; - } - - constexpr u32 GetOffset() const { - return offset; - } +/// Represents a tracked separate sampler image pair that was folded statically +struct SeparateSamplerNode { + std::pair<u32, u32> indices; + std::pair<u32, u32> offsets; +}; -private: +/// Represents a tracked bindless sampler into a direct const buffer +struct BindlessSamplerNode { u32 index; u32 offset; }; diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 11231bbea..1e0886185 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) { template <typename T, typename... Args> TrackSampler MakeTrackSampler(Args&&... args) { static_assert(std::is_convertible_v<T, TrackSamplerData>); - return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); + return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); } template <typename... Args> diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index af70b3f35..cdf274e54 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { return value; } +std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( + std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { + SeparateSamplerKey key; + key.buffers = buffers; + key.offsets = offsets; + const auto iter = separate_samplers.find(key); + if (iter != separate_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + + const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); + const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); + const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); + separate_samplers.emplace(key, value); + return value; +} + std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 0c80d35fd..231206765 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -19,8 +19,39 @@ namespace VideoCommon::Shader { +struct SeparateSamplerKey { + std::pair<u32, u32> buffers; + std::pair<u32, u32> offsets; +}; + +} // namespace VideoCommon::Shader + +namespace std { + +template <> +struct hash<VideoCommon::Shader::SeparateSamplerKey> { + std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { + return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ + key.offsets.second); + } +}; + +template <> +struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { + bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, + const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { + return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; + } +}; + +} // namespace std + +namespace VideoCommon::Shader { + using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; +using SeparateSamplerMap = + std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; using BindlessSamplerMap = std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; @@ -73,6 +104,9 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); + std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( + std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); /// Inserts a key. @@ -128,6 +162,7 @@ private: Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; KeyMap keys; BoundSamplerMap bound_samplers; + SeparateSamplerMap separate_samplers; BindlessSamplerMap bindless_samplers; u32 bound_buffer; GraphicsInfo graphics_info; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 822674926..e322c3402 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -243,56 +244,44 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { } Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { + if (condition == PredCondition::T) { + return GetPredicate(true); + } else if (condition == PredCondition::F) { + return GetPredicate(false); + } + static constexpr std::array comparison_table{ - std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, - std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, - std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, - std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, - std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, - std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, - std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, - std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, - std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, - std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, - std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, + OperationCode(0), + OperationCode::LogicalFOrdLessThan, // LT + OperationCode::LogicalFOrdEqual, // EQ + OperationCode::LogicalFOrdLessEqual, // LE + OperationCode::LogicalFOrdGreaterThan, // GT + OperationCode::LogicalFOrdNotEqual, // NE + OperationCode::LogicalFOrdGreaterEqual, // GE + OperationCode::LogicalFOrdered, // NUM + OperationCode::LogicalFUnordered, // NAN + OperationCode::LogicalFUnordLessThan, // LTU + OperationCode::LogicalFUnordEqual, // EQU + OperationCode::LogicalFUnordLessEqual, // LEU + OperationCode::LogicalFUnordGreaterThan, // GTU + OperationCode::LogicalFUnordNotEqual, // NEU + OperationCode::LogicalFUnordGreaterEqual, // GEU }; + const std::size_t index = static_cast<std::size_t>(condition); + ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); - const auto comparison = - std::find_if(comparison_table.cbegin(), comparison_table.cend(), - [condition](const auto entry) { return condition == entry.first; }); - UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), - "Unknown predicate comparison operation"); - - Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); - - if (condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan || - condition == PredCondition::LessEqualWithNan || - condition == PredCondition::GreaterThanWithNan || - condition == PredCondition::GreaterEqualWithNan) { - predicate = Operation(OperationCode::LogicalOr, predicate, - Operation(OperationCode::LogicalFIsNan, op_a)); - predicate = Operation(OperationCode::LogicalOr, predicate, - Operation(OperationCode::LogicalFIsNan, op_b)); - } - - return predicate; + return Operation(comparison_table[index], op_a, op_b); } Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { static constexpr std::array comparison_table{ - std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, - std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, - std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, - std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, - std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, - std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, - std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, - std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}, + std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, + std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, + std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, }; const auto comparison = @@ -301,32 +290,24 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); - Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), - std::move(op_b)); - - UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || - condition == PredCondition::NotEqualWithNan || - condition == PredCondition::LessEqualWithNan || - condition == PredCondition::GreaterThanWithNan || - condition == PredCondition::GreaterEqualWithNan, - "NaN comparisons for integers are not implemented"); - return predicate; + return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), + std::move(op_b)); } Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b) { static constexpr std::array comparison_table{ - std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, - std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, - std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, - std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, - std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, - std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, - std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, - std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, - std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, - std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, + std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, + std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, + std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, + std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, + std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, + std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, + std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, + std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, + std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, + std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, + std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, }; const auto comparison = @@ -397,7 +378,7 @@ void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc if (!sets_cc) { return; } - Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); + Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 15ae152f2..3a98b2104 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -330,8 +330,8 @@ private: OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); /// Queries the missing sampler info from the execution context. - SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, - std::optional<u32> buffer = std::nullopt); + SamplerInfo GetSamplerInfo(SamplerInfo info, + std::optional<Tegra::Engines::SamplerDescriptor> sampler); /// Accesses a texture sampler. std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); @@ -409,8 +409,14 @@ private: std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor); + std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); + + std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, + const OperationNode& operation, + Node gpr, Node base_offset, + Node tracked, const NodeBlock& code, + s64 cursor); std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index eb97bfd41..d5ed81442 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -14,6 +14,7 @@ namespace VideoCommon::Shader { namespace { + std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { @@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { if (const auto operation = std::get_if<OperationNode>(&*node)) { operation->SetAmendIndex(amend_index); return true; - } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { + } + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { conditional->SetAmendIndex(amend_index); return true; } @@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { } // Anonymous namespace -std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, - s64 cursor) { +std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { + const u32 cbuf_index = cbuf->GetIndex(); + // Constant buffer found, test if it's an immediate const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { - auto track = - MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); + auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); return {tracked, track}; } if (const auto operation = std::get_if<OperationNode>(&*offset)) { const u32 bound_buffer = registry.GetBoundBuffer(); - if (bound_buffer != cbuf->GetIndex()) { + if (bound_buffer != cbuf_index) { return {}; } - const auto pair = DecoupleIndirectRead(*operation); - if (!pair) { - return {}; + if (const std::optional pair = DecoupleIndirectRead(*operation)) { + auto [gpr, base_offset] = *pair; + return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, + code, cursor); } - auto [gpr, base_offset] = *pair; - const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); - const auto& gpu_driver = registry.AccessGuestDriverProfile(); - const u32 bindless_cv = NewCustomVariable(); - Node op = - Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); - - const Node cv_node = GetCustomVariable(bindless_cv); - Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); - const std::size_t amend_index = DeclareAmend(std::move(amend_op)); - AmendNodeCv(amend_index, code[cursor]); - // TODO Implement Bindless Index custom variable - auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), - offset_inm->GetValue(), bindless_cv); - return {tracked, track}; } return {}; } @@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons return TrackBindlessSampler(source, code, new_cursor); } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { - for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); - std::get<0>(found)) { - // Cbuf found in operand. + const OperationNode& op = *operation; + + const OperationCode opcode = operation->GetCode(); + if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { + ASSERT(op.GetOperandsCount() == 2); + auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); + auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); + if (node_a && node_b) { + auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, + std::pair{offset_a, offset_b}); + return {tracked, std::move(track)}; + } + } + std::size_t i = op.GetOperandsCount(); + while (i--) { + if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { + // Constant buffer found in operand. return found; } } @@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons return {}; } +std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( + const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, + const NodeBlock& code, s64 cursor) { + const auto offset_imm = std::get<ImmediateNode>(*base_offset); + const auto& gpu_driver = registry.AccessGuestDriverProfile(); + const u32 bindless_cv = NewCustomVariable(); + const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); + Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); + + Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); + const std::size_t amend_index = DeclareAmend(std::move(amend_op)); + AmendNodeCv(amend_index, code[cursor]); + + // TODO: Implement bindless index custom variable + auto track = + MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); + return {tracked, track}; +} + std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
