diff options
Diffstat (limited to 'src/video_core/shader')
28 files changed, 2646 insertions, 540 deletions
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp new file mode 100644 index 000000000..3f96d9076 --- /dev/null +++ b/src/video_core/shader/ast.cpp @@ -0,0 +1,753 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> +#include <string_view> + +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/shader/ast.h" +#include "video_core/shader/expr.h" + +namespace VideoCommon::Shader { + +ASTZipper::ASTZipper() = default; + +void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { + ASSERT(new_first->manager == nullptr); + first = new_first; + last = new_first; + + ASTNode current = first; + while (current) { + current->manager = this; + current->parent = parent; + last = current; + current = current->next; + } +} + +void ASTZipper::PushBack(const ASTNode new_node) { + ASSERT(new_node->manager == nullptr); + new_node->previous = last; + if (last) { + last->next = new_node; + } + new_node->next.reset(); + last = new_node; + if (!first) { + first = new_node; + } + new_node->manager = this; +} + +void ASTZipper::PushFront(const ASTNode new_node) { + ASSERT(new_node->manager == nullptr); + new_node->previous.reset(); + new_node->next = first; + if (first) { + first->previous = new_node; + } + if (last == first) { + last = new_node; + } + first = new_node; + new_node->manager = this; +} + +void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { + ASSERT(new_node->manager == nullptr); + if (!at_node) { + PushFront(new_node); + return; + } + const ASTNode next = at_node->next; + if (next) { + next->previous = new_node; + } + new_node->previous = at_node; + if (at_node == last) { + last = new_node; + } + new_node->next = next; + at_node->next = new_node; + new_node->manager = this; +} + +void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { + ASSERT(new_node->manager == nullptr); + if (!at_node) { + PushBack(new_node); + return; + } + const ASTNode previous = at_node->previous; + if (previous) { + previous->next = new_node; + } + new_node->next = at_node; + if (at_node == first) { + first = new_node; + } + new_node->previous = previous; + at_node->previous = new_node; + new_node->manager = this; +} + +void ASTZipper::DetachTail(ASTNode node) { + ASSERT(node->manager == this); + if (node == first) { + first.reset(); + last.reset(); + return; + } + + last = node->previous; + last->next.reset(); + node->previous.reset(); + + ASTNode current = std::move(node); + while (current) { + current->manager = nullptr; + current->parent.reset(); + current = current->next; + } +} + +void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { + ASSERT(start->manager == this && end->manager == this); + if (start == end) { + DetachSingle(start); + return; + } + const ASTNode prev = start->previous; + const ASTNode post = end->next; + if (!prev) { + first = post; + } else { + prev->next = post; + } + if (!post) { + last = prev; + } else { + post->previous = prev; + } + start->previous.reset(); + end->next.reset(); + ASTNode current = start; + bool found = false; + while (current) { + current->manager = nullptr; + current->parent.reset(); + found |= current == end; + current = current->next; + } + ASSERT(found); +} + +void ASTZipper::DetachSingle(const ASTNode node) { + ASSERT(node->manager == this); + const ASTNode prev = node->previous; + const ASTNode post = node->next; + node->previous.reset(); + node->next.reset(); + if (!prev) { + first = post; + } else { + prev->next = post; + } + if (!post) { + last = prev; + } else { + post->previous = prev; + } + + node->manager = nullptr; + node->parent.reset(); +} + +void ASTZipper::Remove(const ASTNode node) { + ASSERT(node->manager == this); + const ASTNode next = node->next; + const ASTNode previous = node->previous; + if (previous) { + previous->next = next; + } + if (next) { + next->previous = previous; + } + node->parent.reset(); + node->manager = nullptr; + if (node == last) { + last = previous; + } + if (node == first) { + first = next; + } +} + +class ExprPrinter final { +public: + void operator()(const ExprAnd& expr) { + inner += "( "; + std::visit(*this, *expr.operand1); + inner += " && "; + std::visit(*this, *expr.operand2); + inner += ')'; + } + + void operator()(const ExprOr& expr) { + inner += "( "; + std::visit(*this, *expr.operand1); + inner += " || "; + std::visit(*this, *expr.operand2); + inner += ')'; + } + + void operator()(const ExprNot& expr) { + inner += "!"; + std::visit(*this, *expr.operand1); + } + + void operator()(const ExprPredicate& expr) { + inner += "P" + std::to_string(expr.predicate); + } + + void operator()(const ExprCondCode& expr) { + u32 cc = static_cast<u32>(expr.cc); + inner += "CC" + std::to_string(cc); + } + + void operator()(const ExprVar& expr) { + inner += "V" + std::to_string(expr.var_index); + } + + void operator()(const ExprBoolean& expr) { + inner += expr.value ? "true" : "false"; + } + + void operator()(const ExprGprEqual& expr) { + inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; + } + + const std::string& GetResult() const { + return inner; + } + +private: + std::string inner; +}; + +class ASTPrinter { +public: + void operator()(const ASTProgram& ast) { + scope++; + inner += "program {\n"; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + inner += "}\n"; + scope--; + } + + void operator()(const ASTIfThen& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); + scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + scope--; + inner += fmt::format("{}}}\n", Indent()); + } + + void operator()(const ASTIfElse& ast) { + inner += Indent(); + inner += "else {\n"; + + scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + scope--; + + inner += Indent(); + inner += "}\n"; + } + + void operator()(const ASTBlockEncoded& ast) { + inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); + } + + void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { + inner += Indent(); + inner += "Block;\n"; + } + + void operator()(const ASTVarSet& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); + } + + void operator()(const ASTLabel& ast) { + inner += fmt::format("Label_{}:\n", ast.index); + } + + void operator()(const ASTGoto& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += + fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); + } + + void operator()(const ASTDoWhile& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}do {{\n", Indent()); + scope++; + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + scope--; + inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); + } + + void operator()(const ASTReturn& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), + ast.kills ? "discard" : "exit"); + } + + void operator()(const ASTBreak& ast) { + ExprPrinter expr_parser{}; + std::visit(expr_parser, *ast.condition); + inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); + } + + void Visit(const ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + } + + const std::string& GetResult() const { + return inner; + } + +private: + std::string_view Indent() { + if (space_segment_scope == scope) { + return space_segment; + } + + // Ensure that we don't exceed our view. + ASSERT(scope * 2 < spaces.size()); + + space_segment = spaces.substr(0, scope * 2); + space_segment_scope = scope; + return space_segment; + } + + std::string inner{}; + std::string_view space_segment; + + u32 scope{}; + u32 space_segment_scope{}; + + static constexpr std::string_view spaces{" "}; +}; + +std::string ASTManager::Print() const { + ASTPrinter printer{}; + printer.Visit(main_node); + return printer.GetResult(); +} + +ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation) + : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {}; + +ASTManager::~ASTManager() { + Clear(); +} + +void ASTManager::Init() { + main_node = ASTBase::Make<ASTProgram>(ASTNode{}); + program = std::get_if<ASTProgram>(main_node->GetInnerData()); + false_condition = MakeExpr<ExprBoolean>(false); +} + +void ASTManager::DeclareLabel(u32 address) { + const auto pair = labels_map.emplace(address, labels_count); + if (pair.second) { + labels_count++; + labels.resize(labels_count); + } +} + +void ASTManager::InsertLabel(u32 address) { + const u32 index = labels_map[address]; + const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); + labels[index] = label; + program->nodes.PushBack(label); +} + +void ASTManager::InsertGoto(Expr condition, u32 address) { + const u32 index = labels_map[address]; + const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); + gotos.push_back(goto_node); + program->nodes.PushBack(goto_node); +} + +void ASTManager::InsertBlock(u32 start_address, u32 end_address) { + ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); + program->nodes.PushBack(std::move(block)); +} + +void ASTManager::InsertReturn(Expr condition, bool kills) { + ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); + program->nodes.PushBack(std::move(node)); +} + +// The decompile algorithm is based on +// "Taming control flow: A structured approach to eliminating goto statements" +// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be +// on the same structured level as the label which they jump to. This is done, +// through outward/inward movements and lifting. Once they are at the same +// level, you can enclose them in an "if" structure or a "do-while" structure. +void ASTManager::Decompile() { + auto it = gotos.begin(); + while (it != gotos.end()) { + const ASTNode goto_node = *it; + const auto label_index = goto_node->GetGotoLabel(); + if (!label_index) { + return; + } + const ASTNode label = labels[*label_index]; + if (!full_decompile) { + // We only decompile backward jumps + if (!IsBackwardsJump(goto_node, label)) { + it++; + continue; + } + } + if (IndirectlyRelated(goto_node, label)) { + while (!DirectlyRelated(goto_node, label)) { + MoveOutward(goto_node); + } + } + if (DirectlyRelated(goto_node, label)) { + u32 goto_level = goto_node->GetLevel(); + const u32 label_level = label->GetLevel(); + while (label_level < goto_level) { + MoveOutward(goto_node); + goto_level--; + } + // TODO(Blinkhawk): Implement Lifting and Inward Movements + } + if (label->GetParent() == goto_node->GetParent()) { + bool is_loop = false; + ASTNode current = goto_node->GetPrevious(); + while (current) { + if (current == label) { + is_loop = true; + break; + } + current = current->GetPrevious(); + } + + if (is_loop) { + EncloseDoWhile(goto_node, label); + } else { + EncloseIfThen(goto_node, label); + } + it = gotos.erase(it); + continue; + } + it++; + } + if (full_decompile) { + for (const ASTNode& label : labels) { + auto& manager = label->GetManager(); + manager.Remove(label); + } + labels.clear(); + } else { + auto label_it = labels.begin(); + while (label_it != labels.end()) { + bool can_remove = true; + ASTNode label = *label_it; + for (const ASTNode& goto_node : gotos) { + const auto label_index = goto_node->GetGotoLabel(); + if (!label_index) { + return; + } + ASTNode& glabel = labels[*label_index]; + if (glabel == label) { + can_remove = false; + break; + } + } + if (can_remove) { + label->MarkLabelUnused(); + } + } + } +} + +bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { + u32 goto_level = goto_node->GetLevel(); + u32 label_level = label_node->GetLevel(); + while (goto_level > label_level) { + goto_level--; + goto_node = goto_node->GetParent(); + } + while (label_level > goto_level) { + label_level--; + label_node = label_node->GetParent(); + } + while (goto_node->GetParent() != label_node->GetParent()) { + goto_node = goto_node->GetParent(); + label_node = label_node->GetParent(); + } + ASTNode current = goto_node->GetPrevious(); + while (current) { + if (current == label_node) { + return true; + } + current = current->GetPrevious(); + } + return false; +} + +bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { + return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); +} + +bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { + if (first->GetParent() == second->GetParent()) { + return false; + } + const u32 first_level = first->GetLevel(); + const u32 second_level = second->GetLevel(); + u32 min_level; + u32 max_level; + ASTNode max; + ASTNode min; + if (first_level > second_level) { + min_level = second_level; + min = second; + max_level = first_level; + max = first; + } else { + min_level = first_level; + min = first; + max_level = second_level; + max = second; + } + + while (max_level > min_level) { + max_level--; + max = max->GetParent(); + } + + return min->GetParent() == max->GetParent(); +} + +void ASTManager::ShowCurrentState(std::string_view state) const { + LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); + SanityCheck(); +} + +void ASTManager::SanityCheck() const { + for (const auto& label : labels) { + if (!label->GetParent()) { + LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); + } + } +} + +void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { + ASTZipper& zipper = goto_node->GetManager(); + const ASTNode loop_start = label->GetNext(); + if (loop_start == goto_node) { + zipper.Remove(goto_node); + return; + } + const ASTNode parent = label->GetParent(); + const Expr condition = goto_node->GetGotoCondition(); + zipper.DetachSegment(loop_start, goto_node); + const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); + ASTZipper* sub_zipper = do_while_node->GetSubNodes(); + sub_zipper->Init(loop_start, do_while_node); + zipper.InsertAfter(do_while_node, label); + sub_zipper->Remove(goto_node); +} + +void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { + ASTZipper& zipper = goto_node->GetManager(); + const ASTNode if_end = label->GetPrevious(); + if (if_end == goto_node) { + zipper.Remove(goto_node); + return; + } + const ASTNode prev = goto_node->GetPrevious(); + const Expr condition = goto_node->GetGotoCondition(); + bool do_else = false; + if (!disable_else_derivation && prev->IsIfThen()) { + const Expr if_condition = prev->GetIfCondition(); + do_else = ExprAreEqual(if_condition, condition); + } + const ASTNode parent = label->GetParent(); + zipper.DetachSegment(goto_node, if_end); + ASTNode if_node; + if (do_else) { + if_node = ASTBase::Make<ASTIfElse>(parent); + } else { + Expr neg_condition = MakeExprNot(condition); + if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); + } + ASTZipper* sub_zipper = if_node->GetSubNodes(); + sub_zipper->Init(goto_node, if_node); + zipper.InsertAfter(if_node, prev); + sub_zipper->Remove(goto_node); +} + +void ASTManager::MoveOutward(ASTNode goto_node) { + ASTZipper& zipper = goto_node->GetManager(); + const ASTNode parent = goto_node->GetParent(); + ASTZipper& zipper2 = parent->GetManager(); + const ASTNode grandpa = parent->GetParent(); + const bool is_loop = parent->IsLoop(); + const bool is_else = parent->IsIfElse(); + const bool is_if = parent->IsIfThen(); + + const ASTNode prev = goto_node->GetPrevious(); + const ASTNode post = goto_node->GetNext(); + + const Expr condition = goto_node->GetGotoCondition(); + zipper.DetachSingle(goto_node); + if (is_loop) { + const u32 var_index = NewVariable(); + const Expr var_condition = MakeExpr<ExprVar>(var_index); + const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); + const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); + zipper2.InsertBefore(var_node_init, parent); + zipper.InsertAfter(var_node, prev); + goto_node->SetGotoCondition(var_condition); + const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); + zipper.InsertAfter(break_node, var_node); + } else if (is_if || is_else) { + const u32 var_index = NewVariable(); + const Expr var_condition = MakeExpr<ExprVar>(var_index); + const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); + const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); + if (is_if) { + zipper2.InsertBefore(var_node_init, parent); + } else { + zipper2.InsertBefore(var_node_init, parent->GetPrevious()); + } + zipper.InsertAfter(var_node, prev); + goto_node->SetGotoCondition(var_condition); + if (post) { + zipper.DetachTail(post); + const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); + ASTZipper* sub_zipper = if_node->GetSubNodes(); + sub_zipper->Init(post, if_node); + zipper.InsertAfter(if_node, var_node); + } + } else { + UNREACHABLE(); + } + const ASTNode next = parent->GetNext(); + if (is_if && next && next->IsIfElse()) { + zipper2.InsertAfter(goto_node, next); + goto_node->SetParent(grandpa); + return; + } + zipper2.InsertAfter(goto_node, parent); + goto_node->SetParent(grandpa); +} + +class ASTClearer { +public: + ASTClearer() = default; + + void operator()(const ASTProgram& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(const ASTIfThen& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(const ASTIfElse& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} + + void operator()(ASTBlockDecoded& ast) { + ast.nodes.clear(); + } + + void operator()([[maybe_unused]] const ASTVarSet& ast) {} + + void operator()([[maybe_unused]] const ASTLabel& ast) {} + + void operator()([[maybe_unused]] const ASTGoto& ast) {} + + void operator()(const ASTDoWhile& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()([[maybe_unused]] const ASTReturn& ast) {} + + void operator()([[maybe_unused]] const ASTBreak& ast) {} + + void Visit(const ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + node->Clear(); + } +}; + +void ASTManager::Clear() { + if (!main_node) { + return; + } + ASTClearer clearer{}; + clearer.Visit(main_node); + main_node.reset(); + program = nullptr; + labels_map.clear(); + labels.clear(); + gotos.clear(); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h new file mode 100644 index 000000000..a2f0044ba --- /dev/null +++ b/src/video_core/shader/ast.h @@ -0,0 +1,400 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <functional> +#include <list> +#include <memory> +#include <optional> +#include <string> +#include <unordered_map> +#include <vector> + +#include "video_core/shader/expr.h" +#include "video_core/shader/node.h" + +namespace VideoCommon::Shader { + +class ASTBase; +class ASTBlockDecoded; +class ASTBlockEncoded; +class ASTBreak; +class ASTDoWhile; +class ASTGoto; +class ASTIfElse; +class ASTIfThen; +class ASTLabel; +class ASTProgram; +class ASTReturn; +class ASTVarSet; + +using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, + ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; + +using ASTNode = std::shared_ptr<ASTBase>; + +enum class ASTZipperType : u32 { + Program, + IfThen, + IfElse, + Loop, +}; + +class ASTZipper final { +public: + explicit ASTZipper(); + + void Init(ASTNode first, ASTNode parent); + + ASTNode GetFirst() const { + return first; + } + + ASTNode GetLast() const { + return last; + } + + void PushBack(ASTNode new_node); + void PushFront(ASTNode new_node); + void InsertAfter(ASTNode new_node, ASTNode at_node); + void InsertBefore(ASTNode new_node, ASTNode at_node); + void DetachTail(ASTNode node); + void DetachSingle(ASTNode node); + void DetachSegment(ASTNode start, ASTNode end); + void Remove(ASTNode node); + + ASTNode first{}; + ASTNode last{}; +}; + +class ASTProgram { +public: + ASTZipper nodes{}; +}; + +class ASTIfThen { +public: + explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {} + Expr condition; + ASTZipper nodes{}; +}; + +class ASTIfElse { +public: + ASTZipper nodes{}; +}; + +class ASTBlockEncoded { +public: + explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {} + u32 start; + u32 end; +}; + +class ASTBlockDecoded { +public: + explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {} + NodeBlock nodes; +}; + +class ASTVarSet { +public: + explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {} + u32 index; + Expr condition; +}; + +class ASTLabel { +public: + explicit ASTLabel(u32 index) : index{index} {} + u32 index; + bool unused{}; +}; + +class ASTGoto { +public: + explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {} + Expr condition; + u32 label; +}; + +class ASTDoWhile { +public: + explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {} + Expr condition; + ASTZipper nodes{}; +}; + +class ASTReturn { +public: + explicit ASTReturn(Expr condition, bool kills) + : condition{std::move(condition)}, kills{kills} {} + Expr condition; + bool kills; +}; + +class ASTBreak { +public: + explicit ASTBreak(Expr condition) : condition{std::move(condition)} {} + Expr condition; +}; + +class ASTBase { +public: + explicit ASTBase(ASTNode parent, ASTData data) + : data{std::move(data)}, parent{std::move(parent)} {} + + template <class U, class... Args> + static ASTNode Make(ASTNode parent, Args&&... args) { + return std::make_shared<ASTBase>(std::move(parent), + ASTData(U(std::forward<Args>(args)...))); + } + + void SetParent(ASTNode new_parent) { + parent = std::move(new_parent); + } + + ASTNode& GetParent() { + return parent; + } + + const ASTNode& GetParent() const { + return parent; + } + + u32 GetLevel() const { + u32 level = 0; + auto next_parent = parent; + while (next_parent) { + next_parent = next_parent->GetParent(); + level++; + } + return level; + } + + ASTData* GetInnerData() { + return &data; + } + + const ASTData* GetInnerData() const { + return &data; + } + + ASTNode GetNext() const { + return next; + } + + ASTNode GetPrevious() const { + return previous; + } + + ASTZipper& GetManager() { + return *manager; + } + + const ASTZipper& GetManager() const { + return *manager; + } + + std::optional<u32> GetGotoLabel() const { + auto inner = std::get_if<ASTGoto>(&data); + if (inner) { + return {inner->label}; + } + return {}; + } + + Expr GetGotoCondition() const { + auto inner = std::get_if<ASTGoto>(&data); + if (inner) { + return inner->condition; + } + return nullptr; + } + + void MarkLabelUnused() { + auto inner = std::get_if<ASTLabel>(&data); + if (inner) { + inner->unused = true; + } + } + + bool IsLabelUnused() const { + auto inner = std::get_if<ASTLabel>(&data); + if (inner) { + return inner->unused; + } + return true; + } + + std::optional<u32> GetLabelIndex() const { + auto inner = std::get_if<ASTLabel>(&data); + if (inner) { + return {inner->index}; + } + return {}; + } + + Expr GetIfCondition() const { + auto inner = std::get_if<ASTIfThen>(&data); + if (inner) { + return inner->condition; + } + return nullptr; + } + + void SetGotoCondition(Expr new_condition) { + auto inner = std::get_if<ASTGoto>(&data); + if (inner) { + inner->condition = std::move(new_condition); + } + } + + bool IsIfThen() const { + return std::holds_alternative<ASTIfThen>(data); + } + + bool IsIfElse() const { + return std::holds_alternative<ASTIfElse>(data); + } + + bool IsBlockEncoded() const { + return std::holds_alternative<ASTBlockEncoded>(data); + } + + void TransformBlockEncoded(NodeBlock&& nodes) { + data = ASTBlockDecoded(std::move(nodes)); + } + + bool IsLoop() const { + return std::holds_alternative<ASTDoWhile>(data); + } + + ASTZipper* GetSubNodes() { + if (std::holds_alternative<ASTProgram>(data)) { + return &std::get_if<ASTProgram>(&data)->nodes; + } + if (std::holds_alternative<ASTIfThen>(data)) { + return &std::get_if<ASTIfThen>(&data)->nodes; + } + if (std::holds_alternative<ASTIfElse>(data)) { + return &std::get_if<ASTIfElse>(&data)->nodes; + } + if (std::holds_alternative<ASTDoWhile>(data)) { + return &std::get_if<ASTDoWhile>(&data)->nodes; + } + return nullptr; + } + + void Clear() { + next.reset(); + previous.reset(); + parent.reset(); + manager = nullptr; + } + +private: + friend class ASTZipper; + + ASTData data; + ASTNode parent{}; + ASTNode next{}; + ASTNode previous{}; + ASTZipper* manager{}; +}; + +class ASTManager final { +public: + ASTManager(bool full_decompile, bool disable_else_derivation); + ~ASTManager(); + + ASTManager(const ASTManager& o) = delete; + ASTManager& operator=(const ASTManager& other) = delete; + + ASTManager(ASTManager&& other) noexcept = default; + ASTManager& operator=(ASTManager&& other) noexcept = default; + + void Init(); + + void DeclareLabel(u32 address); + + void InsertLabel(u32 address); + + void InsertGoto(Expr condition, u32 address); + + void InsertBlock(u32 start_address, u32 end_address); + + void InsertReturn(Expr condition, bool kills); + + std::string Print() const; + + void Decompile(); + + void ShowCurrentState(std::string_view state) const; + + void SanityCheck() const; + + void Clear(); + + bool IsFullyDecompiled() const { + if (full_decompile) { + return gotos.empty(); + } + + for (ASTNode goto_node : gotos) { + auto label_index = goto_node->GetGotoLabel(); + if (!label_index) { + return false; + } + ASTNode glabel = labels[*label_index]; + if (IsBackwardsJump(goto_node, glabel)) { + return false; + } + } + return true; + } + + ASTNode GetProgram() const { + return main_node; + } + + u32 GetVariables() const { + return variables; + } + + const std::vector<ASTNode>& GetLabels() const { + return labels; + } + +private: + bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; + + bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; + + bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; + + void EncloseDoWhile(ASTNode goto_node, ASTNode label); + + void EncloseIfThen(ASTNode goto_node, ASTNode label); + + void MoveOutward(ASTNode goto_node); + + u32 NewVariable() { + return variables++; + } + + bool full_decompile{}; + bool disable_else_derivation{}; + std::unordered_map<u32, u32> labels_map{}; + u32 labels_count{}; + std::vector<ASTNode> labels{}; + std::list<ASTNode> gotos{}; + u32 variables{}; + ASTProgram* program{}; + ASTNode main_node{}; + Expr false_condition{}; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp new file mode 100644 index 000000000..cddcbd4f0 --- /dev/null +++ b/src/video_core/shader/compiler_settings.cpp @@ -0,0 +1,26 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader/compiler_settings.h" + +namespace VideoCommon::Shader { + +std::string CompileDepthAsString(const CompileDepth cd) { + switch (cd) { + case CompileDepth::BruteForce: + return "Brute Force Compile"; + case CompileDepth::FlowStack: + return "Simple Flow Stack Mode"; + case CompileDepth::NoFlowStack: + return "Remove Flow Stack"; + case CompileDepth::DecompileBackwards: + return "Decompile Backward Jumps"; + case CompileDepth::FullDecompile: + return "Full Decompilation"; + default: + return "Unknown Compiler Process"; + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h new file mode 100644 index 000000000..916018c01 --- /dev/null +++ b/src/video_core/shader/compiler_settings.h @@ -0,0 +1,26 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +enum class CompileDepth : u32 { + BruteForce = 0, + FlowStack = 1, + NoFlowStack = 2, + DecompileBackwards = 3, + FullDecompile = 4, +}; + +std::string CompileDepthAsString(CompileDepth cd); + +struct CompilerSettings { + CompileDepth depth{CompileDepth::NoFlowStack}; + bool disable_else_derivation{true}; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp new file mode 100644 index 000000000..fe467608e --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -0,0 +1,110 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <memory> +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/const_buffer_locker.h" + +namespace VideoCommon::Shader { + +using Tegra::Engines::SamplerDescriptor; + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) + : stage{shader_stage} {} + +ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine) + : stage{shader_stage}, engine{&engine} {} + +ConstBufferLocker::~ConstBufferLocker() = default; + +std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { + const std::pair<u32, u32> key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; +} + +std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { + const u32 key = offset; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; +} + +std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( + u32 buffer, u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; +} + +void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { + keys.insert_or_assign({buffer, offset}, value); +} + +void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); +} + +void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); +} + +bool ConstBufferLocker::IsConsistent() const { + if (!engine) { + return false; + } + return std::all_of(keys.begin(), keys.end(), + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + return value == engine->AccessBoundSampler(stage, key); + }) && + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { + return keys == rhs.keys && bound_samplers == rhs.bound_samplers && + bindless_samplers == rhs.bindless_samplers; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h new file mode 100644 index 000000000..600e2f3c3 --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.h @@ -0,0 +1,80 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" + +namespace VideoCommon::Shader { + +using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; +using BindlessSamplerMap = + std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + +/** + * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader + * compiler. with it, the shader can obtain required data from GPU state and store it for disk + * shader compilation. + **/ +class ConstBufferLocker { +public: + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); + + explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine); + + ~ConstBufferLocker(); + + /// Retrieves a key from the locker, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. + std::optional<u32> ObtainKey(u32 buffer, u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + + /// Inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + /// Inserts a bound sampler key. + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Inserts a bindless sampler key. + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Checks keys and samplers against engine's current const buffers. Returns true if they are + /// the same value, false otherwise; + bool IsConsistent() const; + + /// Returns true if the keys are equal to the other ones in the locker. + bool HasEqualKeys(const ConstBufferLocker& rhs) const; + + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + +private: + const Tegra::Engines::ShaderType stage; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index ec3a76690..b427ac873 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -4,18 +4,21 @@ #include <list> #include <map> +#include <set> #include <stack> #include <unordered_map> -#include <unordered_set> #include <vector> #include "common/assert.h" #include "common/common_types.h" +#include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { + namespace { + using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -34,14 +37,20 @@ struct BlockStack { std::stack<u32> pbk_stack{}; }; -struct BlockBranchInfo { - Condition condition{}; - s32 address{exit_branch}; - bool kill{}; - bool is_sync{}; - bool is_brk{}; - bool ignore{}; -}; +template <typename T, typename... Args> +BlockBranchInfo MakeBranchInfo(Args&&... args) { + static_assert(std::is_convertible_v<T, BranchData>); + return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); +} + +bool BlockBranchIsIgnored(BlockBranchInfo first) { + bool ignore = false; + if (std::holds_alternative<SingleBranch>(*first)) { + const auto branch = std::get_if<SingleBranch>(first.get()); + ignore = branch->ignore; + } + return ignore; +} struct BlockInfo { u32 start{}; @@ -55,21 +64,21 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start) - : start{start}, program_code{program_code}, program_size{program_size} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) + : program_code{program_code}, start{start}, locker{locker} {} - u32 start{}; - std::vector<BlockInfo> block_info{}; - std::list<u32> inspect_queries{}; - std::list<Query> queries{}; - std::unordered_map<u32, u32> registered{}; - std::unordered_set<u32> labels{}; - std::map<u32, u32> ssy_labels{}; - std::map<u32, u32> pbk_labels{}; - std::unordered_map<u32, BlockStack> stacks{}; const ProgramCode& program_code; - const std::size_t program_size; + ConstBufferLocker& locker; + u32 start{}; + std::vector<BlockInfo> block_info; + std::list<u32> inspect_queries; + std::list<Query> queries; + std::unordered_map<u32, u32> registered; + std::set<u32> labels; + std::map<u32, u32> ssy_labels; + std::map<u32, u32> pbk_labels; + std::unordered_map<u32, BlockStack> stacks; + ASTManager* manager{}; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -102,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { } Pred GetPredicate(u32 index, bool negated) { - return static_cast<Pred>(index + (negated ? 8 : 0)); + return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); } /** @@ -122,10 +131,122 @@ enum class ParseResult : u32 { AbnormalFlow, }; +struct BranchIndirectInfo { + u32 buffer{}; + u32 offset{}; + u32 entries{}; + s32 relative_position{}; +}; + +struct BufferInfo { + u32 index; + u32 offset; +}; + +std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { + const Instruction instr = state.program_code[pos]; + const auto opcode = OpCode::Decode(instr); + if (opcode->get().GetId() != OpCode::Id::BRX) { + return std::nullopt; + } + if (instr.brx.constant_buffer != 0) { + return std::nullopt; + } + --pos; + return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); +} + +template <typename Result, typename TestCallable, typename PackCallable> +// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> +// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> +std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, + PackCallable pack) { + for (; pos >= state.start; --pos) { + if (IsSchedInstruction(pos, state.start)) { + continue; + } + const Instruction instr = state.program_code[pos]; + const auto opcode = OpCode::Decode(instr); + if (!opcode) { + continue; + } + if (test(instr, opcode->get())) { + --pos; + return std::make_optional(pack(instr, opcode->get())); + } + } + return std::nullopt; +} + +std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, + u64 brx_tracked_register) { + return TrackInstruction<std::pair<BufferInfo, u64>>( + state, pos, + [brx_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::LD_C && + instr.gpr0.Value() == brx_tracked_register && + instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; + }, + [](auto instr, const auto& opcode) { + const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), + static_cast<u32>(instr.cbuf36.GetOffset())}; + return std::make_pair(info, instr.gpr8.Value()); + }); +} + +std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, + u64 ldc_tracked_register) { + return TrackInstruction<u64>(state, pos, + [ldc_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::SHL_IMM && + instr.gpr0.Value() == ldc_tracked_register; + }, + [](auto instr, const auto&) { return instr.gpr8.Value(); }); +} + +std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, + u64 shl_tracked_register) { + return TrackInstruction<u32>(state, pos, + [shl_tracked_register](auto instr, const auto& opcode) { + return opcode.GetId() == OpCode::Id::IMNMX_IMM && + instr.gpr0.Value() == shl_tracked_register; + }, + [](auto instr, const auto&) { + return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); + }); +} + +std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { + const auto brx_info = GetBRXInfo(state, pos); + if (!brx_info) { + return std::nullopt; + } + const auto [relative_position, brx_tracked_register] = *brx_info; + + const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); + if (!ldc_info) { + return std::nullopt; + } + const auto [buffer_info, ldc_tracked_register] = *ldc_info; + + const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); + if (!shl_tracked_register) { + return std::nullopt; + } + + const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); + if (!entries) { + return std::nullopt; + } + + return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; +} + std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { u32 offset = static_cast<u32>(address); - const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); + const u32 end_address = static_cast<u32>(state.program_code.size()); ParseInfo parse_info{}; + SingleBranch single_branch{}; const auto insert_label = [](CFGRebuildState& state, u32 address) { const auto pair = state.labels.emplace(address); @@ -138,13 +259,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) if (offset >= end_address) { // ASSERT_OR_EXECUTE can't be used, as it ignores the break ASSERT_MSG(false, "Shader passed the current limit!"); - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.ignore = false; + + single_branch.address = exit_branch; + single_branch.ignore = false; break; } if (state.registered.count(offset) != 0) { - parse_info.branch_info.address = offset; - parse_info.branch_info.ignore = true; + single_branch.address = offset; + single_branch.ignore = true; break; } if (IsSchedInstruction(offset, state.start)) { @@ -161,24 +283,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) switch (opcode->get().GetId()) { case OpCode::Id::EXIT: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -187,99 +311,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) return {ParseResult::AbnormalFlow, parse_info}; } const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } const u32 branch_offset = offset + instr.bra.GetBranchTarget(); if (branch_offset == 0) { - parse_info.branch_info.address = exit_branch; + single_branch.address = exit_branch; } else { - parse_info.branch_info.address = branch_offset; + single_branch.address = branch_offset; } insert_label(state, branch_offset); - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::SYNC: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = true; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = true; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::BRK: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = unassigned_branch; - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = true; - parse_info.branch_info.ignore = false; + single_branch.address = unassigned_branch; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = true; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } case OpCode::Id::KIL: { const auto pred_index = static_cast<u32>(instr.pred.pred_index); - parse_info.branch_info.condition.predicate = - GetPredicate(pred_index, instr.negate_pred != 0); - if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); + if (single_branch.condition.predicate == Pred::NeverExecute) { offset++; continue; } const ConditionCode cc = instr.flow_condition_code; - parse_info.branch_info.condition.cc = cc; + single_branch.condition.cc = cc; if (cc == ConditionCode::F) { offset++; continue; } - parse_info.branch_info.address = exit_branch; - parse_info.branch_info.kill = true; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; - parse_info.branch_info.ignore = false; + single_branch.address = exit_branch; + single_branch.kill = true; + single_branch.is_sync = false; + single_branch.is_brk = false; + single_branch.ignore = false; parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, + single_branch.is_sync, single_branch.is_brk, single_branch.ignore); return {ParseResult::ControlCaught, parse_info}; } @@ -296,7 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) break; } case OpCode::Id::BRX: { - return {ParseResult::AbnormalFlow, parse_info}; + const auto tmp = TrackBranchIndirectInfo(state, offset); + if (!tmp) { + LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); + return {ParseResult::AbnormalFlow, parse_info}; + } + + const auto result = *tmp; + const s32 pc_target = offset + result.relative_position; + std::vector<CaseBranch> branches; + for (u32 i = 0; i < result.entries; i++) { + auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + if (!key) { + return {ParseResult::AbnormalFlow, parse_info}; + } + u32 value = *key; + u32 target = static_cast<u32>((value >> 3) + pc_target); + insert_label(state, target); + branches.emplace_back(value, target); + } + parse_info.end_address = offset; + parse_info.branch_info = MakeBranchInfo<MultiBranch>( + static_cast<u32>(instr.gpr8.Value()), std::move(branches)); + + return {ParseResult::ControlCaught, parse_info}; } default: break; @@ -304,10 +459,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) offset++; } - parse_info.branch_info.kill = false; - parse_info.branch_info.is_sync = false; - parse_info.branch_info.is_brk = false; + single_branch.kill = false; + single_branch.is_sync = false; + single_branch.is_brk = false; parse_info.end_address = offset - 1; + parse_info.branch_info = MakeBranchInfo<SingleBranch>( + single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, + single_branch.is_brk, single_branch.ignore); return {ParseResult::BlockEnd, parse_info}; } @@ -331,9 +489,10 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; new_block.branch = current_block.branch; - BlockBranchInfo forward_branch{}; - forward_branch.address = address; - forward_branch.ignore = true; + BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); + const auto branch = std::get_if<SingleBranch>(forward_branch.get()); + branch->address = address; + branch->ignore = true; current_block.branch = forward_branch; return true; } @@ -348,12 +507,15 @@ bool TryInspectAddress(CFGRebuildState& state) { BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); block_info.branch = parse_info.branch_info; - if (parse_info.branch_info.condition.IsUnconditional()) { + if (std::holds_alternative<SingleBranch>(*block_info.branch)) { + const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); + if (branch->condition.IsUnconditional()) { + return true; + } + const u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); return true; } - - const u32 fallthrough_address = parse_info.end_address + 1; - state.inspect_queries.push_front(fallthrough_address); return true; } @@ -391,91 +553,205 @@ bool TryQuery(CFGRebuildState& state) { state.queries.pop_front(); gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); - if (!block.branch.condition.IsUnconditional()) { - q2.address = block.end + 1; - state.queries.push_back(q2); - } + if (std::holds_alternative<SingleBranch>(*block.branch)) { + const auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (!branch->condition.IsUnconditional()) { + q2.address = block.end + 1; + state.queries.push_back(q2); + } - Query conditional_query{q2}; - if (block.branch.is_sync) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.ssy_stack.top(); + Query conditional_query{q2}; + if (branch->is_sync) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.ssy_stack.top(); + } + conditional_query.ssy_stack.pop(); } - conditional_query.ssy_stack.pop(); - } - if (block.branch.is_brk) { - if (block.branch.address == unassigned_branch) { - block.branch.address = conditional_query.pbk_stack.top(); + if (branch->is_brk) { + if (branch->address == unassigned_branch) { + branch->address = conditional_query.pbk_stack.top(); + } + conditional_query.pbk_stack.pop(); } - conditional_query.pbk_stack.pop(); + conditional_query.address = branch->address; + state.queries.push_back(std::move(conditional_query)); + return true; + } + const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + for (const auto& branch_case : multi_branch->branches) { + Query conditional_query{q2}; + conditional_query.address = branch_case.address; + state.queries.push_back(std::move(conditional_query)); } - conditional_query.address = block.branch.address; - state.queries.push_back(std::move(conditional_query)); return true; } + } // Anonymous namespace -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address) { - CFGRebuildState state{program_code, program_size, start_address}; +void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { + const auto get_expr = ([&](const Condition& cond) -> Expr { + Expr result{}; + if (cond.cc != ConditionCode::T) { + result = MakeExpr<ExprCondCode>(cond.cc); + } + if (cond.predicate != Pred::UnusedIndex) { + u32 pred = static_cast<u32>(cond.predicate); + bool negate = false; + if (pred > 7) { + negate = true; + pred -= 8; + } + Expr extra = MakeExpr<ExprPredicate>(pred); + if (negate) { + extra = MakeExpr<ExprNot>(extra); + } + if (result) { + return MakeExpr<ExprAnd>(extra, result); + } + return extra; + } + if (result) { + return result; + } + return MakeExpr<ExprBoolean>(true); + }); + if (std::holds_alternative<SingleBranch>(*branch_info)) { + const auto branch = std::get_if<SingleBranch>(branch_info.get()); + if (branch->address < 0) { + if (branch->kill) { + mm.InsertReturn(get_expr(branch->condition), true); + return; + } + mm.InsertReturn(get_expr(branch->condition), false); + return; + } + mm.InsertGoto(get_expr(branch->condition), branch->address); + return; + } + const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); + for (const auto& branch_case : multi_branch->branches) { + mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), + branch_case.address); + } +} + +void DecompileShader(CFGRebuildState& state) { + state.manager->Init(); + for (auto label : state.labels) { + state.manager->DeclareLabel(label); + } + for (auto& block : state.block_info) { + if (state.labels.count(block.start) != 0) { + state.manager->InsertLabel(block.start); + } + const bool ignore = BlockBranchIsIgnored(block.branch); + u32 end = ignore ? block.end + 1 : block.end; + state.manager->InsertBlock(block.start, end); + if (!ignore) { + InsertBranch(*state.manager, block.branch); + } + } + state.manager->Decompile(); +} + +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + ConstBufferLocker& locker) { + auto result_out = std::make_unique<ShaderCharacteristics>(); + if (settings.depth == CompileDepth::BruteForce) { + result_out->settings.depth = CompileDepth::BruteForce; + return result_out; + } + CFGRebuildState state{program_code, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); state.inspect_queries.push_back(state.start); while (!state.inspect_queries.empty()) { if (!TryInspectAddress(state)) { - return {}; + result_out->settings.depth = CompileDepth::BruteForce; + return result_out; } } - // Decompile Stacks - state.queries.push_back(Query{state.start, {}, {}}); - bool decompiled = true; - while (!state.queries.empty()) { - if (!TryQuery(state)) { - decompiled = false; - break; + bool use_flow_stack = true; + + bool decompiled = false; + + if (settings.depth != CompileDepth::FlowStack) { + // Decompile Stacks + state.queries.push_back(Query{state.start, {}, {}}); + decompiled = true; + while (!state.queries.empty()) { + if (!TryQuery(state)) { + decompiled = false; + break; + } } } + use_flow_stack = !decompiled; + // Sort and organize results std::sort(state.block_info.begin(), state.block_info.end(), - [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; }); - ShaderCharacteristics result_out{}; - result_out.decompilable = decompiled; - result_out.start = start_address; - result_out.end = start_address; - for (const auto& block : state.block_info) { + [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + if (decompiled && settings.depth != CompileDepth::NoFlowStack) { + ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, + settings.disable_else_derivation}; + state.manager = &manager; + DecompileShader(state); + decompiled = state.manager->IsFullyDecompiled(); + if (!decompiled) { + if (settings.depth == CompileDepth::FullDecompile) { + LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); + } else { + LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); + } + state.manager->ShowCurrentState("Of Shader"); + state.manager->Clear(); + } else { + auto characteristics = std::make_unique<ShaderCharacteristics>(); + characteristics->start = start_address; + characteristics->settings.depth = settings.depth; + characteristics->manager = std::move(manager); + characteristics->end = state.block_info.back().end + 1; + return characteristics; + } + } + + result_out->start = start_address; + result_out->settings.depth = + use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; + result_out->blocks.clear(); + for (auto& block : state.block_info) { ShaderBlock new_block{}; new_block.start = block.start; new_block.end = block.end; - new_block.ignore_branch = block.branch.ignore; + new_block.ignore_branch = BlockBranchIsIgnored(block.branch); if (!new_block.ignore_branch) { - new_block.branch.cond = block.branch.condition; - new_block.branch.kills = block.branch.kill; - new_block.branch.address = block.branch.address; + new_block.branch = block.branch; } - result_out.end = std::max(result_out.end, block.end); - result_out.blocks.push_back(new_block); + result_out->end = std::max(result_out->end, block.end); + result_out->blocks.push_back(new_block); } - if (result_out.decompilable) { - result_out.labels = std::move(state.labels); - return {std::move(result_out)}; + if (!use_flow_stack) { + result_out->labels = std::move(state.labels); + return result_out; } - // If it's not decompilable, merge the unlabelled blocks together - auto back = result_out.blocks.begin(); + auto back = result_out->blocks.begin(); auto next = std::next(back); - while (next != result_out.blocks.end()) { + while (next != result_out->blocks.end()) { if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { back->end = next->end; - next = result_out.blocks.erase(next); + next = result_out->blocks.erase(next); continue; } back = next; ++next; } - return {std::move(result_out)}; + + return result_out; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index b0a5e4f8c..5304998b9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -6,9 +6,12 @@ #include <list> #include <optional> -#include <unordered_set> +#include <set> +#include <variant> #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/ast.h" +#include "video_core/shader/compiler_settings.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -35,29 +38,61 @@ struct Condition { } }; -struct ShaderBlock { - struct Branch { - Condition cond{}; - bool kills{}; - s32 address{}; +class SingleBranch { +public: + SingleBranch() = default; + SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, + bool ignore) + : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, + ignore{ignore} {} + + bool operator==(const SingleBranch& b) const { + return std::tie(condition, address, kill, is_sync, is_brk, ignore) == + std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); + } + + bool operator!=(const SingleBranch& b) const { + return !operator==(b); + } + + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; + bool ignore{}; +}; - bool operator==(const Branch& b) const { - return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); - } +struct CaseBranch { + CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} + u32 cmp_value; + u32 address; +}; + +class MultiBranch { +public: + MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) + : gpr{gpr}, branches{std::move(branches)} {} + + u32 gpr{}; + std::vector<CaseBranch> branches{}; +}; + +using BranchData = std::variant<SingleBranch, MultiBranch>; +using BlockBranchInfo = std::shared_ptr<BranchData>; - bool operator!=(const Branch& b) const { - return !operator==(b); - } - }; +bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); +struct ShaderBlock { u32 start{}; u32 end{}; bool ignore_branch{}; - Branch branch{}; + BlockBranchInfo branch{}; bool operator==(const ShaderBlock& sb) const { - return std::tie(start, end, ignore_branch, branch) == - std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); + return std::tie(start, end, ignore_branch) == + std::tie(sb.start, sb.end, sb.ignore_branch) && + BlockBranchInfoAreEqual(branch, sb.branch); } bool operator!=(const ShaderBlock& sb) const { @@ -67,13 +102,15 @@ struct ShaderBlock { struct ShaderCharacteristics { std::list<ShaderBlock> blocks{}; - bool decompilable{}; + std::set<u32> labels{}; u32 start{}; u32 end{}; - std::unordered_set<u32> labels{}; + ASTManager manager{true, true}; + CompilerSettings settings{}; }; -std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, - std::size_t program_size, u32 start_address); +std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, + const CompilerSettings& settings, + ConstBufferLocker& locker); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 47a9fd961..22c3e5120 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,60 +33,140 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -} // namespace +} // Anonymous namespace + +class ASTDecoder { +public: + ASTDecoder(ShaderIR& ir) : ir(ir) {} + + void operator()(ASTProgram& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTIfThen& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTIfElse& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTBlockEncoded& ast) {} + + void operator()(ASTBlockDecoded& ast) {} + + void operator()(ASTVarSet& ast) {} + + void operator()(ASTLabel& ast) {} + + void operator()(ASTGoto& ast) {} + + void operator()(ASTDoWhile& ast) { + ASTNode current = ast.nodes.GetFirst(); + while (current) { + Visit(current); + current = current->GetNext(); + } + } + + void operator()(ASTReturn& ast) {} + + void operator()(ASTBreak& ast) {} + + void Visit(ASTNode& node) { + std::visit(*this, *node->GetInnerData()); + if (node->IsBlockEncoded()) { + auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); + NodeBlock bb = ir.DecodeRange(block->start, block->end); + node->TransformBlockEncoded(std::move(bb)); + } + } + +private: + ShaderIR& ir; +}; void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_size, main_offset); - if (info) { - const auto& shader_info = *info; - coverage_begin = shader_info.start; - coverage_end = shader_info.end; - if (shader_info.decompilable) { - disable_flow_stack = true; - const auto insert_block = [this](NodeBlock& nodes, u32 label) { - if (label == static_cast<u32>(exit_branch)) { - return; - } - basic_blocks.insert({label, nodes}); - }; - const auto& blocks = shader_info.blocks; - NodeBlock current_block; - u32 current_label = static_cast<u32>(exit_branch); - for (auto& block : blocks) { - if (shader_info.labels.count(block.start) != 0) { - insert_block(current_block, current_label); - current_block.clear(); - current_label = block.start; - } - if (!block.ignore_branch) { - DecodeRangeInner(current_block, block.start, block.end); - InsertControlFlow(current_block, block); - } else { - DecodeRangeInner(current_block, block.start, block.end + 1); - } - } - insert_block(current_block, current_label); - return; - } - LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); - // we can't decompile it, fallback to standard method + decompiled = false; + auto info = ScanFlow(program_code, main_offset, settings, locker); + auto& shader_info = *info; + coverage_begin = shader_info.start; + coverage_end = shader_info.end; + switch (shader_info.settings.depth) { + case CompileDepth::FlowStack: { for (const auto& block : shader_info.blocks) { basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); } - return; + break; } - LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); - - // Now we need to deal with an undecompilable shader. We need to brute force - // a shader that captures every position. - coverage_begin = main_offset; - const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); - coverage_end = shader_end; - for (u32 label = main_offset; label < shader_end; label++) { - basic_blocks.insert({label, DecodeRange(label, label + 1)}); + case CompileDepth::NoFlowStack: { + disable_flow_stack = true; + const auto insert_block = [this](NodeBlock& nodes, u32 label) { + if (label == static_cast<u32>(exit_branch)) { + return; + } + basic_blocks.insert({label, nodes}); + }; + const auto& blocks = shader_info.blocks; + NodeBlock current_block; + u32 current_label = static_cast<u32>(exit_branch); + for (auto& block : blocks) { + if (shader_info.labels.count(block.start) != 0) { + insert_block(current_block, current_label); + current_block.clear(); + current_label = block.start; + } + if (!block.ignore_branch) { + DecodeRangeInner(current_block, block.start, block.end); + InsertControlFlow(current_block, block); + } else { + DecodeRangeInner(current_block, block.start, block.end + 1); + } + } + insert_block(current_block, current_label); + break; + } + case CompileDepth::DecompileBackwards: + case CompileDepth::FullDecompile: { + program_manager = std::move(shader_info.manager); + disable_flow_stack = true; + decompiled = true; + ASTDecoder decoder{*this}; + ASTNode program = GetASTProgram(); + decoder.Visit(program); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); + [[fallthrough]]; + case CompileDepth::BruteForce: { + const auto shader_end = static_cast<u32>(program_code.size()); + coverage_begin = main_offset; + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; ++label) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); + } + break; + } + } + if (settings.depth != shader_info.settings.depth) { + LOG_WARNING( + HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", + CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); } } @@ -118,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { } return result; }; - if (block.branch.address < 0) { - if (block.branch.kills) { - Node n = Operation(OperationCode::Discard); - n = apply_conditions(block.branch.cond, n); + if (std::holds_alternative<SingleBranch>(*block.branch)) { + auto branch = std::get_if<SingleBranch>(block.branch.get()); + if (branch->address < 0) { + if (branch->kill) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(branch->condition, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Exit); - n = apply_conditions(block.branch.cond, n); + Node n = Operation(OperationCode::Branch, Immediate(branch->address)); + n = apply_conditions(branch->condition, n); bb.push_back(n); global_code.push_back(n); return; } - Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); - n = apply_conditions(block.branch.cond, n); - bb.push_back(n); - global_code.push_back(n); + auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + Node op_a = GetRegister(multi_branch->gpr); + for (auto& branch_case : multi_branch->branches) { + Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); + Node op_b = Immediate(branch_case.cmp_value); + Node condition = + GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); + auto result = Conditional(condition, {n}); + bb.push_back(result); + global_code.push_back(result); + } } u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 1473c282a..fcedd2af6 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::FMUL_IMM: { // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. if (instr.fmul.tab5cb8_2 != 0) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", - instr.fmul.tab5cb8_2.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", + instr.fmul.tab5cb8_2.Value()); } if (instr.fmul.tab5c68_0 != 1) { - LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", - instr.fmul.tab5c68_0.Value()); + LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", + instr.fmul.tab5c68_0.Value()); } op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); @@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { case OpCode::Id::RRO_C: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { + LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); + // Currently RRO is only implemented as a register move. op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); SetRegister(bb, instr.gpr0, op_b); - LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index b06cbe441..ee7d9a29d 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { if (opcode->get().GetId() == OpCode::Id::HADD2_C || opcode->get().GetId() == OpCode::Id::HADD2_R) { - if (instr.alu_half.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 6466fc011..d179b9873 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { - if (instr.alu_half_imm.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } else { - if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } } diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index b73f6536e..a33d242e9 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::ICMP_IMM: { const Node zero = Immediate(0); - const auto [op_b, test] = [&]() -> std::pair<Node, Node> { + const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), @@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { return {zero, zero}; } }(); - const Node op_a = GetRegister(instr.gpr8); + const Node op_lhs = GetRegister(instr.gpr8); const Node comparison = GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); - SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); break; } case OpCode::Id::LOP_C: diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index ca2f39e8d..5973588d6 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); if (instr.ffma.tab5980_0 != 1) { - LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); } if (instr.ffma.tab5980_1 != 0) { - LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); + LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); } const Node op_a = GetRegister(instr.gpr8); diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 48ca7a4af..848e46874 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz != 0) { - LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); + if (instr.hset2.ftz == 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index 840694527..310655619 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -18,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - DEBUG_ASSERT(instr.hsetp2.ftz == 0); + if (instr.hsetp2.ftz != 0) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); @@ -32,6 +35,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { h_and = instr.hsetp2.cbuf_and_imm.h_and; op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); + // F32 is hardcoded in hardware + op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); break; case OpCode::Id::HSETP2_IMM: cond = instr.hsetp2.cbuf_and_imm.cond; diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 95ec1cdd9..d2fe4ec5d 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { } Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset{static_cast<std::size_t>(image.index.Value())}; - if (const auto image = TryUseExistingImage(offset, type)) { - return *image; + const auto offset = static_cast<u32>(image.index.Value()); + + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [offset](const Image& entry) { return entry.GetOffset() == offset; }); + if (it != std::end(used_images)) { + ASSERT(!it->IsBindless() && it->GetType() == it->GetType()); + return *it; } - const std::size_t next_index{used_images.size()}; - return used_images.emplace(offset, Image{offset, next_index, type}).first->second; + const auto next_index = static_cast<u32>(used_images.size()); + return used_images.emplace_back(next_index, offset, type); } Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { - const Node image_register{GetRegister(reg)}; - const auto [base_image, cbuf_index, cbuf_offset]{ - TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; - - if (const auto image = TryUseExistingImage(cbuf_key, type)) { - return *image; - } - - const std::size_t next_index{used_images.size()}; - return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) - .first->second; -} - -Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) { - auto it = used_images.find(offset); - if (it == used_images.end()) { - return nullptr; + const Node image_register = GetRegister(reg); + const auto [base_image, buffer, offset] = + TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + + const auto it = + std::find_if(std::begin(used_images), std::end(used_images), + [buffer = buffer, offset = offset](const Image& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != std::end(used_images)) { + ASSERT(it->IsBindless() && it->GetType() == it->GetType()); + return *it; } - auto& image = it->second; - ASSERT(image.GetType() == type); - return ℑ + const auto next_index = static_cast<u32>(used_images.size()); + return used_images.emplace_back(next_index, offset, buffer, type); } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7923d4d69..335d78146 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, false); + TrackGlobalMemory(bb, instr, false); const u32 count = GetUniformTypeElementsCount(type); + if (!real_address_base || !base_address) { + // Tracking failed, load zeroes. + for (u32 i = 0; i < count; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); + } + break; + } + for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { }(); const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, instr, true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporary(bb, 0, real_address_base); + TrackGlobalMemory(bb, instr, true); + if (!real_address_base || !base_address) { + // Tracking failed, skip the store. + break; + } const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { - SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); + const Node value = GetRegister(instr.gpr0.Value() + i); + bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } @@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } -std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Instruction instr, - bool is_write) { +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, + Instruction instr, + bool is_write) { const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; const auto [base_address, index, offset] = TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - ASSERT(base_address != nullptr); + ASSERT_OR_EXECUTE_MSG(base_address != nullptr, + { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, + "Global memory tracking failed"); bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46e0f823..17cd45d3c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::MOV_SYS: { - const Node value = [&]() { + const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); @@ -256,7 +256,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::DEPBAR: { - LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); + LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index f6ee68a54..d419e9c45 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { const auto opcode = OpCode::Decode(instr); Node op_a = GetRegister(instr.gpr8); - Node op_b = [&]() { + Node op_b = [this, instr] { if (instr.is_b_imm) { return Immediate(instr.alu.GetSignedImm20_20()); } else if (instr.is_b_gpr) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..bb926a132 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - const TextureType texture_type{instr.tex_b.texture_type}; const bool is_array = instr.tex_b.array != 0; const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); @@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); - if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); - } - const Node4 components = GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); @@ -96,6 +84,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TLD4_B: { + is_bindless = true; + [[fallthrough]]; + } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), @@ -103,24 +95,20 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), "PTP is not implemented"); - if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); - } - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) + : instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) + : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); WriteTexInstructionFloat( - bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); + bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless)); break; } case OpCode::Id::TLD4S: { UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); - } const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); const Node op_a = GetRegister(instr.gpr8); @@ -141,7 +129,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -150,25 +138,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } - WriteTexsInstructionFloat(bb, instr, values); + WriteTexsInstructionFloat(bb, instr, values, true); break; } case OpCode::Id::TXQ_B: is_bindless = true; [[fallthrough]]; case OpCode::Id::TXQ: { - if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); - } - // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - is_bindless - ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, - false) - : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); u32 indexer = 0; switch (instr.txq.query_type) { @@ -201,15 +182,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); - if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = is_bindless - ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) - : GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = + is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) + : GetSampler(instr.sampler, {{texture_type, is_array, false}}); std::vector<Node> coords; @@ -250,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); - if (instr.tld.nodep_flag) { - LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); - } - WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); break; } case OpCode::Id::TLDS: { - const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; + const TextureType texture_type{instr.tlds.GetTextureType()}; const bool is_array{instr.tlds.IsArrayTexture()}; UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); - } - const Node4 components = GetTldsCode(instr, texture_type, is_array); if (instr.tlds.fp32_flag) { @@ -285,48 +254,84 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { - const auto offset = static_cast<std::size_t>(sampler.index.Value()); +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, + std::optional<SamplerInfo> sampler_info) { + const auto offset = static_cast<u32>(sampler.index.Value()); + + TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (const auto sampler = locker.ObtainBoundSampler(offset)) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + LOG_WARNING(HW_GPU, "Unknown sampler info"); + type = TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. - const auto itr = + const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [&](const Sampler& entry) { return entry.GetOffset() == offset; }); - if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array && - itr->IsShadow() == is_shadow); - return *itr; + [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && + it->IsShadow() == is_shadow); + return *it; } // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_samplers.size(); - const Sampler entry{offset, next_index, type, is_array, is_shadow}; - return *used_samplers.emplace(entry).first; + const auto next_index = static_cast<u32>(used_samplers.size()); + return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow)); } -const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, - bool is_array, bool is_shadow) { +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, + std::optional<SamplerInfo> sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, cbuf_index, cbuf_offset] = + const auto [base_sampler, buffer, offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); ASSERT(base_sampler != nullptr); - const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); + + TextureType type; + bool is_array; + bool is_shadow; + if (sampler_info) { + type = sampler_info->type; + is_array = sampler_info->is_array; + is_shadow = sampler_info->is_shadow; + } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) { + type = sampler->texture_type.Value(); + is_array = sampler->is_array.Value() != 0; + is_shadow = sampler->is_shadow.Value() != 0; + } else { + LOG_WARNING(HW_GPU, "Unknown sampler info"); + type = TextureType::Texture2D; + is_array = false; + is_shadow = false; + } // If this sampler has already been used, return the existing mapping. - const auto itr = + const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), - [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); - if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array && - itr->IsShadow() == is_shadow); - return *itr; + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && + it->IsShadow() == is_shadow); + return *it; } // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_samplers.size(); - const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; - return *used_samplers.emplace(entry).first; + const auto next_index = static_cast<u32>(used_samplers.size()); + return used_samplers.emplace_back( + Sampler(next_index, offset, buffer, type, is_array, is_shadow)); } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { @@ -344,14 +349,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const } } -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, + bool ignore_mask) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 u32 dest_elem = 0; for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) + if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) continue; SetTemporary(bb, dest_elem++, components[component]); } @@ -411,9 +416,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = is_bindless - ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) - : GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = + is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) + : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || @@ -553,7 +558,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi) { + bool is_array, bool is_aoffi, bool is_bindless) { const std::size_t coord_count = GetCoordCount(texture_type); // If enabled arrays index is always stored in the gpr8 field @@ -567,6 +572,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de } u64 parameter_register = instr.gpr20.Value(); + + const auto& sampler = + is_bindless + ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) + : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); + std::vector<Node> aoffi; if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); @@ -577,12 +588,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de dc = GetRegister(parameter_register++); } - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) + : Immediate(static_cast<u32>(instr.tld4.component)); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component, + element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -610,7 +623,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { @@ -646,7 +659,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is // When lod is used always is in gpr20 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; for (u32 element = 0; element < values.size(); ++element) { diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 97fc6f9b1..b047cf870 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { const Node op_a = GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, instr.video.type_a, instr.video.byte_height_a); - const Node op_b = [&]() { + const Node op_b = [this, instr] { if (instr.video.use_register_b) { return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, instr.video.signed_b, instr.video.type_b, diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index a8e481b3c..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; using Tegra::Shader::VoteOperation; namespace { + OperationCode GetOperationCode(VoteOperation vote_op) { switch (vote_op) { case VoteOperation::All: @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { return OperationCode::VoteAll; } } + } // Anonymous namespace u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { @@ -48,47 +50,57 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { case OpCode::Id::SHFL: { Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) : GetRegister(instr.gpr39); - Node width = [&] { - // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has - // been done reversing Nvidia's math. It won't work on all cases due to SHFL having - // different parameters that don't properly map to GLSL's interface, but it should work - // for cases emitted by Nvidia's compiler. - if (instr.shfl.operation == ShuffleOperation::Up) { - return Operation( - OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), - Immediate(8)); - } else { - return Operation(OperationCode::ILogicalShiftRight, - Operation(OperationCode::IAdd, Immediate(0x201F), - Operation(OperationCode::INegate, std::move(mask))), - Immediate(8)); - } - }(); + Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) + : GetRegister(instr.gpr20); + + Node thread_id = Operation(OperationCode::ThreadId); + Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); + Node seg_mask = BitfieldExtract(mask, 8, 16); - const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { + Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); + Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); + Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, + Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); + + Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { switch (instr.shfl.operation) { case ShuffleOperation::Idx: - return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; - case ShuffleOperation::Up: - return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; + return Operation(OperationCode::IBitwiseOr, + Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), + min_thread_id); case ShuffleOperation::Down: - return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; + return Operation(OperationCode::IAdd, thread_id, index); + case ShuffleOperation::Up: + return Operation(OperationCode::IAdd, thread_id, + Operation(OperationCode::INegate, index)); case ShuffleOperation::Bfly: - return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; + return Operation(OperationCode::IBitwiseXor, thread_id, index); } - UNREACHABLE_MSG("Invalid SHFL operation: {}", - static_cast<u64>(instr.shfl.operation.Value())); - return {}; + UNREACHABLE(); + return Immediate(0U); }(); - // Setting the predicate before the register is intentional to avoid overwriting. - Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) - : GetRegister(instr.gpr20); - SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); + Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { + if (instr.shfl.operation == ShuffleOperation::Up) { + return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); + } else { + return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); + } + }(); + + SetPredicate(bb, instr.shfl.pred48, in_bounds); SetRegister( bb, instr.gpr0, - Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); + Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); + break; + } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); break; } default: diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp new file mode 100644 index 000000000..2647865d4 --- /dev/null +++ b/src/video_core/shader/expr.cpp @@ -0,0 +1,93 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <variant> + +#include "video_core/shader/expr.h" + +namespace VideoCommon::Shader { +namespace { +bool ExprIsBoolean(const Expr& expr) { + return std::holds_alternative<ExprBoolean>(*expr); +} + +bool ExprBooleanGet(const Expr& expr) { + return std::get_if<ExprBoolean>(expr.get())->value; +} +} // Anonymous namespace + +bool ExprAnd::operator==(const ExprAnd& b) const { + return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); +} + +bool ExprAnd::operator!=(const ExprAnd& b) const { + return !operator==(b); +} + +bool ExprOr::operator==(const ExprOr& b) const { + return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); +} + +bool ExprOr::operator!=(const ExprOr& b) const { + return !operator==(b); +} + +bool ExprNot::operator==(const ExprNot& b) const { + return *operand1 == *b.operand1; +} + +bool ExprNot::operator!=(const ExprNot& b) const { + return !operator==(b); +} + +Expr MakeExprNot(Expr first) { + if (std::holds_alternative<ExprNot>(*first)) { + return std::get_if<ExprNot>(first.get())->operand1; + } + return MakeExpr<ExprNot>(std::move(first)); +} + +Expr MakeExprAnd(Expr first, Expr second) { + if (ExprIsBoolean(first)) { + return ExprBooleanGet(first) ? second : first; + } + if (ExprIsBoolean(second)) { + return ExprBooleanGet(second) ? first : second; + } + return MakeExpr<ExprAnd>(std::move(first), std::move(second)); +} + +Expr MakeExprOr(Expr first, Expr second) { + if (ExprIsBoolean(first)) { + return ExprBooleanGet(first) ? first : second; + } + if (ExprIsBoolean(second)) { + return ExprBooleanGet(second) ? second : first; + } + return MakeExpr<ExprOr>(std::move(first), std::move(second)); +} + +bool ExprAreEqual(const Expr& first, const Expr& second) { + return (*first) == (*second); +} + +bool ExprAreOpposite(const Expr& first, const Expr& second) { + if (std::holds_alternative<ExprNot>(*first)) { + return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); + } + if (std::holds_alternative<ExprNot>(*second)) { + return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); + } + return false; +} + +bool ExprIsTrue(const Expr& first) { + if (ExprIsBoolean(first)) { + return ExprBooleanGet(first); + } + return false; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h new file mode 100644 index 000000000..4e8264367 --- /dev/null +++ b/src/video_core/shader/expr.h @@ -0,0 +1,156 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <variant> + +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::ConditionCode; +using Tegra::Shader::Pred; + +class ExprAnd; +class ExprBoolean; +class ExprCondCode; +class ExprGprEqual; +class ExprNot; +class ExprOr; +class ExprPredicate; +class ExprVar; + +using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, + ExprBoolean, ExprGprEqual>; +using Expr = std::shared_ptr<ExprData>; + +class ExprAnd final { +public: + explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} + + bool operator==(const ExprAnd& b) const; + bool operator!=(const ExprAnd& b) const; + + Expr operand1; + Expr operand2; +}; + +class ExprOr final { +public: + explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} + + bool operator==(const ExprOr& b) const; + bool operator!=(const ExprOr& b) const; + + Expr operand1; + Expr operand2; +}; + +class ExprNot final { +public: + explicit ExprNot(Expr a) : operand1{std::move(a)} {} + + bool operator==(const ExprNot& b) const; + bool operator!=(const ExprNot& b) const; + + Expr operand1; +}; + +class ExprVar final { +public: + explicit ExprVar(u32 index) : var_index{index} {} + + bool operator==(const ExprVar& b) const { + return var_index == b.var_index; + } + + bool operator!=(const ExprVar& b) const { + return !operator==(b); + } + + u32 var_index; +}; + +class ExprPredicate final { +public: + explicit ExprPredicate(u32 predicate) : predicate{predicate} {} + + bool operator==(const ExprPredicate& b) const { + return predicate == b.predicate; + } + + bool operator!=(const ExprPredicate& b) const { + return !operator==(b); + } + + u32 predicate; +}; + +class ExprCondCode final { +public: + explicit ExprCondCode(ConditionCode cc) : cc{cc} {} + + bool operator==(const ExprCondCode& b) const { + return cc == b.cc; + } + + bool operator!=(const ExprCondCode& b) const { + return !operator==(b); + } + + ConditionCode cc; +}; + +class ExprBoolean final { +public: + explicit ExprBoolean(bool val) : value{val} {} + + bool operator==(const ExprBoolean& b) const { + return value == b.value; + } + + bool operator!=(const ExprBoolean& b) const { + return !operator==(b); + } + + bool value; +}; + +class ExprGprEqual final { +public: + ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} + + bool operator==(const ExprGprEqual& b) const { + return gpr == b.gpr && value == b.value; + } + + bool operator!=(const ExprGprEqual& b) const { + return !operator==(b); + } + + u32 gpr; + u32 value; +}; + +template <typename T, typename... Args> +Expr MakeExpr(Args&&... args) { + static_assert(std::is_convertible_v<T, ExprData>); + return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); +} + +bool ExprAreEqual(const Expr& first, const Expr& second); + +bool ExprAreOpposite(const Expr& first, const Expr& second); + +Expr MakeExprNot(Expr first); + +Expr MakeExprAnd(Expr first, Expr second); + +Expr MakeExprOr(Expr first, Expr second); + +bool ExprIsTrue(const Expr& first); + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode { FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float + FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int @@ -181,15 +182,8 @@ enum class OperationCode { VoteAny, /// (bool) -> bool VoteEqual, /// (bool) -> bool - ShuffleIndexed, /// (uint value, uint index, uint width) -> uint - ShuffleUp, /// (uint value, uint index, uint width) -> uint - ShuffleDown, /// (uint value, uint index, uint width) -> uint - ShuffleButterfly, /// (uint value, uint index, uint width) -> uint - - InRangeShuffleIndexed, /// (uint index, uint width) -> bool - InRangeShuffleUp, /// (uint index, uint width) -> bool - InRangeShuffleDown, /// (uint index, uint width) -> bool - InRangeShuffleButterfly, /// (uint index, uint width) -> bool + ThreadId, /// () -> uint + ShuffleIndexed, /// (uint value, uint index) -> uint Amount, }; @@ -230,62 +224,49 @@ using NodeBlock = std::vector<Node>; class Sampler { public: /// This constructor is for bound samplers - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{false} {} + constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} /// This constructor is for bindless samplers - explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} - - /// This constructor is for serialization/deserialization - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_bindless) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{is_bindless} {} - - std::size_t GetOffset() const { + constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, + is_shadow{is_shadow}, is_bindless{true} {} + + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { return offset; } - std::size_t GetIndex() const { - return index; + constexpr u32 GetBuffer() const { + return buffer; } - Tegra::Shader::TextureType GetType() const { + constexpr Tegra::Shader::TextureType GetType() const { return type; } - bool IsArray() const { + constexpr bool IsArray() const { return is_array; } - bool IsShadow() const { + constexpr bool IsShadow() const { return is_shadow; } - bool IsBindless() const { + constexpr bool IsBindless() const { return is_bindless; } - std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - bool operator<(const Sampler& rhs) const { - return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < - std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, - rhs.is_bindless); - } - private: - /// Offset in TSC memory from which to read the sampler object, as specified by the sampling - /// instruction. - std::size_t offset{}; - std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. + u32 index{}; ///< Emulated index given for the this sampler. + u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. + u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. @@ -294,18 +275,13 @@ private: class Image final { public: - constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) - : offset{offset}, index{index}, type{type}, is_bindless{false} {} + /// This constructor is for bound images + constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type) + : index{index}, offset{offset}, type{type} {} - constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_bindless{true} {} - - constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless, bool is_written, bool is_read, bool is_atomic) - : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, - is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {} + /// This constructor is for bindless samplers + constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type) + : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {} void MarkWrite() { is_written = true; @@ -321,12 +297,16 @@ public: is_atomic = true; } - constexpr std::size_t GetOffset() const { + constexpr u32 GetIndex() const { + return index; + } + + constexpr u32 GetOffset() const { return offset; } - constexpr std::size_t GetIndex() const { - return index; + constexpr u32 GetBuffer() const { + return buffer; } constexpr Tegra::Shader::ImageType GetType() const { @@ -349,18 +329,11 @@ public: return is_atomic; } - constexpr std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - constexpr bool operator<(const Image& rhs) const { - return std::tie(offset, index, type, is_bindless) < - std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); - } - private: - u64 offset{}; - std::size_t index{}; + u32 index{}; + u32 offset{}; + u32 buffer{}; + Tegra::Shader::ImageType type{}; bool is_bindless{}; bool is_written{}; @@ -410,7 +383,7 @@ public: explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} explicit OperationNode(OperationCode code, Meta meta) - : OperationNode(code, meta, std::vector<Node>{}) {} + : OperationNode(code, std::move(meta), std::vector<Node>{}) {} explicit OperationNode(OperationCode code, std::vector<Node> operands) : OperationNode(code, Meta{}, std::move(operands)) {} diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 2c357f310..1d9825c76 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -2,8 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> #include <cmath> -#include <unordered_map> #include "common/assert.h" #include "common/common_types.h" @@ -22,8 +23,9 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) - : program_code{program_code}, main_offset{main_offset}, program_size{size} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); } @@ -137,7 +139,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); } -Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { +Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { const Node node = MakeNode<InternalFlagNode>(flag); if (negated) { return Operation(OperationCode::LogicalNegate, node); @@ -269,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { } Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::LogicalFLessThan}, - {PredCondition::Equal, OperationCode::LogicalFEqual}, - {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, - {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, - {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, - {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, - {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, + std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, + std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); @@ -304,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::LogicalILessThan}, - {PredCondition::Equal, OperationCode::LogicalIEqual}, - {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, - {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, - {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, - {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, - {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, - {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, - {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, - {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, + std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, + std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), @@ -335,45 +343,52 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b) { - const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { - {PredCondition::LessThan, OperationCode::Logical2HLessThan}, - {PredCondition::Equal, OperationCode::Logical2HEqual}, - {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, - {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, - {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, - {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, - {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, - {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, - {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, - {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, - {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; - - const auto comparison{PredicateComparisonTable.find(condition)}; - UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), + static constexpr std::array comparison_table{ + std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, + std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, + std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, + std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, + std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, + std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, + std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, + std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, + std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, + std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, + std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, + }; + + const auto comparison = + std::find_if(comparison_table.cbegin(), comparison_table.cend(), + [condition](const auto entry) { return condition == entry.first; }); + UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), "Unknown predicate comparison operation"); return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); } OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { - const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { - {PredOperation::And, OperationCode::LogicalAnd}, - {PredOperation::Or, OperationCode::LogicalOr}, - {PredOperation::Xor, OperationCode::LogicalXor}, + static constexpr std::array operation_table{ + OperationCode::LogicalAnd, + OperationCode::LogicalOr, + OperationCode::LogicalXor, }; - const auto op = PredicateOperationTable.find(operation); - UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); - return op->second; + const auto index = static_cast<std::size_t>(operation); + if (index >= operation_table.size()) { + UNIMPLEMENTED_MSG("Unknown predicate operation."); + return {}; + } + + return operation_table[index]; } -Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { +Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { switch (cc) { case Tegra::Shader::ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); default: UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); - return GetPredicate(static_cast<u64>(Pred::NeverExecute)); + return MakeNode<PredicateNode>(Pred::NeverExecute, false); } } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6f666ee30..76a849818 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <list> #include <map> #include <optional> #include <set> @@ -15,6 +16,9 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/ast.h" +#include "video_core/shader/compiler_settings.h" +#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" namespace VideoCommon::Shader { @@ -45,7 +49,7 @@ public: } u32 GetSize() const { - return max_offset + sizeof(float); + return max_offset + static_cast<u32>(sizeof(float)); } u32 GetMaxOffset() const { @@ -64,7 +68,8 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, + ConstBufferLocker& locker); ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { @@ -91,11 +96,11 @@ public: return used_cbufs; } - const std::set<Sampler>& GetSamplers() const { + const std::list<Sampler>& GetSamplers() const { return used_samplers; } - const std::map<u64, Image>& GetImages() const { + const std::list<Image>& GetImages() const { return used_images; } @@ -144,11 +149,38 @@ public: return disable_flow_stack; } - u32 ConvertAddressToNvidiaSpace(const u32 address) const { - return (address - main_offset) * sizeof(Tegra::Shader::Instruction); + bool IsDecompiled() const { + return decompiled; } + const ASTManager& GetASTManager() const { + return program_manager; + } + + ASTNode GetASTProgram() const { + return program_manager.GetProgram(); + } + + u32 GetASTNumVariables() const { + return program_manager.GetVariables(); + } + + u32 ConvertAddressToNvidiaSpace(u32 address) const { + return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); + } + + /// Returns a condition code evaluated from internal flags + Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; + private: + friend class ASTDecoder; + + struct SamplerInfo { + Tegra::Shader::TextureType type; + bool is_array; + bool is_shadow; + }; + void Decode(); NodeBlock DecodeRange(u32 begin, u32 end); @@ -213,7 +245,7 @@ private: /// Generates a node representing an output attribute. Keeps track of used attributes. Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); /// Generates a node representing an internal flag - Node GetInternalFlag(InternalFlag flag, bool negated = false); + Node GetInternalFlag(InternalFlag flag, bool negated = false) const; /// Generates a node representing a local memory address Node GetLocalMemory(Node address); /// Generates a node representing a shared memory address @@ -271,17 +303,13 @@ private: /// Returns a predicate combiner operation OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); - /// Returns a condition code evaluated from internal flags - Node GetConditionCode(Tegra::Shader::ConditionCode cc); - /// Accesses a texture sampler const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + std::optional<SamplerInfo> sampler_info); // Accesses a texture sampler for a bindless texture. const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, - Tegra::Shader::TextureType type, bool is_array, - bool is_shadow); + std::optional<SamplerInfo> sampler_info); /// Accesses an image. Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); @@ -289,9 +317,6 @@ private: /// Access a bindless image sampler. Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); - /// Tries to access an existing image, updating it's state as needed - Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type); - /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -302,7 +327,7 @@ private: const Node4& components); void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, - const Node4& components); + const Node4& components, bool ignore_mask = false); void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, const Node4& components); @@ -316,7 +341,7 @@ private: bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi); + bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); Node4 GetTldCode(Tegra::Shader::Instruction instr); @@ -351,12 +376,16 @@ private: std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) const; - std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( - NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); + std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, + Tegra::Shader::Instruction instr, + bool is_write); const ProgramCode& program_code; const u32 main_offset; - const std::size_t program_size; + const CompilerSettings settings; + ConstBufferLocker& locker; + + bool decompiled{}; bool disable_flow_stack{}; u32 coverage_begin{}; @@ -364,14 +393,15 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; + ASTManager program_manager{true, true}; std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; std::set<Tegra::Shader::Attribute::Index> used_input_attributes; std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; - std::set<Sampler> used_samplers; - std::map<u64, Image> used_images; + std::list<Sampler> used_samplers; + std::list<Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; bool uses_layer{}; |
