diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/shader/decode/image.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 100 | ||||
| -rw-r--r-- | src/video_core/shader/decode/shift.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 14 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 7 |
7 files changed, 89 insertions, 57 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 2e2711350..6d313963a 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -484,17 +484,17 @@ bool TryInspectAddress(CFGRebuildState& state) { } case BlockCollision::Inside: { // This case is the tricky one: - // We need to Split the block in 2 sepparate blocks + // We need to split the block into 2 separate blocks const u32 end = state.block_info[block_index].end; BlockInfo& new_block = CreateBlockInfo(state, address, end); BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; - new_block.branch = current_block.branch; + new_block.branch = std::move(current_block.branch); BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); const auto branch = std::get_if<SingleBranch>(forward_branch.get()); branch->address = address; branch->ignore = true; - current_block.branch = forward_branch; + current_block.branch = std::move(forward_branch); return true; } default: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 0dd7a1196..85ee9aa5e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -352,8 +352,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); - const auto [base_image, buffer, offset] = TrackCbuf( - image_register, global_code, static_cast<s64>(global_code.size())); + const auto result = TrackCbuf(image_register, global_code, + static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); descriptor = registry.ObtainBindlessSampler(buffer, offset); } if (!descriptor) { @@ -497,9 +499,12 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); - const auto [base_image, buffer, offset] = + const auto result = TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); + const auto it = std::find_if(std::begin(used_images), std::end(used_images), [buffer = buffer, offset = offset](const Image& entry) { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b8f63922f..8112ead3e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include <algorithm> +#include <utility> #include <vector> + #include <fmt/format.h> #include "common/alignment.h" @@ -16,6 +18,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; @@ -27,29 +30,26 @@ using Tegra::Shader::StoreType; namespace { -Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { - const OperationCode operation_code = [op] { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); - return OperationCode::AtomicIAdd; - } - }(); - return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +OperationCode GetAtomOperation(AtomicOp op) { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } } bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { @@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), - Immediate(size)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); } Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), - std::move(offset), Immediate(size)); + Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), + Immediate(size)); } Node Sign16Extend(Node value) { Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); + return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); } } // Anonymous namespace @@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { if (IsUnaligned(type)) { const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); + value = InsertUnaligned(gmem, move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::RED: { + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = GetRegister(instr.gpr0); + bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + break; + } case OpCode::Id::ATOM: { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::SafeAdd, "operation={}", static_cast<int>(instr.atom.operation.Value())); UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64, + instr.atom.type == GlobalAtomicType::U64 || + instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || + instr.atom.type == GlobalAtomicType::F32_FTZ_RN, "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = @@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; + instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20))); break; } case OpCode::Id::ATOMS: { @@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - Node value = - GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, - GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, + GetSharedMemory(move(address)), GetRegister(instr.gpr20))); break; } case OpCode::Id::AL2P: { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3b391d3e6..d4ffa8014 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,6 @@ Node IsFull(Node shift) { } Node Shift(OperationCode opcode, Node value, Node shift) { - Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); Node shifted = Operation(opcode, move(value), shift); return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5fcc9da60..3eee961f5 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -178,6 +178,20 @@ enum class OperationCode { AtomicIOr, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int + ReduceUAdd, /// (memory, uint) -> void + ReduceUMin, /// (memory, uint) -> void + ReduceUMax, /// (memory, uint) -> void + ReduceUAnd, /// (memory, uint) -> void + ReduceUOr, /// (memory, uint) -> void + ReduceUXor, /// (memory, uint) -> void + + ReduceIAdd, /// (memory, int) -> void + ReduceIMin, /// (memory, int) -> void + ReduceIMax, /// (memory, int) -> void + ReduceIAnd, /// (memory, int) -> void + ReduceIOr, /// (memory, int) -> void + ReduceIXor, /// (memory, int) -> void + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8852c8a1b..822674926 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -56,8 +56,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsed(offset); + used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); return MakeNode<CbufNode>(index, Immediate(offset)); } @@ -66,8 +65,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsedIndirect(); + used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); Node final_offset = [&] { // Attempt to inline constant buffer without a variable offset. This is done to allow @@ -166,6 +164,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe std::move(value), Immediate(16)); value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, std::move(value), Immediate(16)); + return value; case Register::Size::Word: // Default - do nothing return value; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 10739b37d..224943ad9 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -27,8 +27,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); - auto [found, internal_cursor] = FindOperation( + auto result = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); + auto& found = result.first; if (found) { return {std::move(found), cursor}; } @@ -186,8 +187,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand - const auto [found, found_cursor] = - TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto& found = result.first; if (!found) { return {}; } |
