aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/ast.cpp753
-rw-r--r--src/video_core/shader/ast.h400
-rw-r--r--src/video_core/shader/compiler_settings.cpp26
-rw-r--r--src/video_core/shader/compiler_settings.h26
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp110
-rw-r--r--src/video_core/shader/const_buffer_locker.h80
-rw-r--r--src/video_core/shader/control_flow.cpp540
-rw-r--r--src/video_core/shader/control_flow.h77
-rw-r--r--src/video_core/shader/decode.cpp207
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp8
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp6
-rw-r--r--src/video_core/shader/decode/ffma.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp4
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp7
-rw-r--r--src/video_core/shader/decode/image.cpp50
-rw-r--r--src/video_core/shader/decode/memory.cpp39
-rw-r--r--src/video_core/shader/decode/other.cpp4
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp181
-rw-r--r--src/video_core/shader/decode/video.cpp2
-rw-r--r--src/video_core/shader/decode/warp.cpp74
-rw-r--r--src/video_core/shader/expr.cpp93
-rw-r--r--src/video_core/shader/expr.h156
-rw-r--r--src/video_core/shader/node.h115
-rw-r--r--src/video_core/shader/shader_ir.cpp131
-rw-r--r--src/video_core/shader/shader_ir.h76
28 files changed, 2646 insertions, 540 deletions
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
new file mode 100644
index 000000000..3f96d9076
--- /dev/null
+++ b/src/video_core/shader/ast.cpp
@@ -0,0 +1,753 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/expr.h"
+
+namespace VideoCommon::Shader {
+
+ASTZipper::ASTZipper() = default;
+
+void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
+ ASSERT(new_first->manager == nullptr);
+ first = new_first;
+ last = new_first;
+
+ ASTNode current = first;
+ while (current) {
+ current->manager = this;
+ current->parent = parent;
+ last = current;
+ current = current->next;
+ }
+}
+
+void ASTZipper::PushBack(const ASTNode new_node) {
+ ASSERT(new_node->manager == nullptr);
+ new_node->previous = last;
+ if (last) {
+ last->next = new_node;
+ }
+ new_node->next.reset();
+ last = new_node;
+ if (!first) {
+ first = new_node;
+ }
+ new_node->manager = this;
+}
+
+void ASTZipper::PushFront(const ASTNode new_node) {
+ ASSERT(new_node->manager == nullptr);
+ new_node->previous.reset();
+ new_node->next = first;
+ if (first) {
+ first->previous = new_node;
+ }
+ if (last == first) {
+ last = new_node;
+ }
+ first = new_node;
+ new_node->manager = this;
+}
+
+void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
+ ASSERT(new_node->manager == nullptr);
+ if (!at_node) {
+ PushFront(new_node);
+ return;
+ }
+ const ASTNode next = at_node->next;
+ if (next) {
+ next->previous = new_node;
+ }
+ new_node->previous = at_node;
+ if (at_node == last) {
+ last = new_node;
+ }
+ new_node->next = next;
+ at_node->next = new_node;
+ new_node->manager = this;
+}
+
+void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
+ ASSERT(new_node->manager == nullptr);
+ if (!at_node) {
+ PushBack(new_node);
+ return;
+ }
+ const ASTNode previous = at_node->previous;
+ if (previous) {
+ previous->next = new_node;
+ }
+ new_node->next = at_node;
+ if (at_node == first) {
+ first = new_node;
+ }
+ new_node->previous = previous;
+ at_node->previous = new_node;
+ new_node->manager = this;
+}
+
+void ASTZipper::DetachTail(ASTNode node) {
+ ASSERT(node->manager == this);
+ if (node == first) {
+ first.reset();
+ last.reset();
+ return;
+ }
+
+ last = node->previous;
+ last->next.reset();
+ node->previous.reset();
+
+ ASTNode current = std::move(node);
+ while (current) {
+ current->manager = nullptr;
+ current->parent.reset();
+ current = current->next;
+ }
+}
+
+void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
+ ASSERT(start->manager == this && end->manager == this);
+ if (start == end) {
+ DetachSingle(start);
+ return;
+ }
+ const ASTNode prev = start->previous;
+ const ASTNode post = end->next;
+ if (!prev) {
+ first = post;
+ } else {
+ prev->next = post;
+ }
+ if (!post) {
+ last = prev;
+ } else {
+ post->previous = prev;
+ }
+ start->previous.reset();
+ end->next.reset();
+ ASTNode current = start;
+ bool found = false;
+ while (current) {
+ current->manager = nullptr;
+ current->parent.reset();
+ found |= current == end;
+ current = current->next;
+ }
+ ASSERT(found);
+}
+
+void ASTZipper::DetachSingle(const ASTNode node) {
+ ASSERT(node->manager == this);
+ const ASTNode prev = node->previous;
+ const ASTNode post = node->next;
+ node->previous.reset();
+ node->next.reset();
+ if (!prev) {
+ first = post;
+ } else {
+ prev->next = post;
+ }
+ if (!post) {
+ last = prev;
+ } else {
+ post->previous = prev;
+ }
+
+ node->manager = nullptr;
+ node->parent.reset();
+}
+
+void ASTZipper::Remove(const ASTNode node) {
+ ASSERT(node->manager == this);
+ const ASTNode next = node->next;
+ const ASTNode previous = node->previous;
+ if (previous) {
+ previous->next = next;
+ }
+ if (next) {
+ next->previous = previous;
+ }
+ node->parent.reset();
+ node->manager = nullptr;
+ if (node == last) {
+ last = previous;
+ }
+ if (node == first) {
+ first = next;
+ }
+}
+
+class ExprPrinter final {
+public:
+ void operator()(const ExprAnd& expr) {
+ inner += "( ";
+ std::visit(*this, *expr.operand1);
+ inner += " && ";
+ std::visit(*this, *expr.operand2);
+ inner += ')';
+ }
+
+ void operator()(const ExprOr& expr) {
+ inner += "( ";
+ std::visit(*this, *expr.operand1);
+ inner += " || ";
+ std::visit(*this, *expr.operand2);
+ inner += ')';
+ }
+
+ void operator()(const ExprNot& expr) {
+ inner += "!";
+ std::visit(*this, *expr.operand1);
+ }
+
+ void operator()(const ExprPredicate& expr) {
+ inner += "P" + std::to_string(expr.predicate);
+ }
+
+ void operator()(const ExprCondCode& expr) {
+ u32 cc = static_cast<u32>(expr.cc);
+ inner += "CC" + std::to_string(cc);
+ }
+
+ void operator()(const ExprVar& expr) {
+ inner += "V" + std::to_string(expr.var_index);
+ }
+
+ void operator()(const ExprBoolean& expr) {
+ inner += expr.value ? "true" : "false";
+ }
+
+ void operator()(const ExprGprEqual& expr) {
+ inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
+ }
+
+ const std::string& GetResult() const {
+ return inner;
+ }
+
+private:
+ std::string inner;
+};
+
+class ASTPrinter {
+public:
+ void operator()(const ASTProgram& ast) {
+ scope++;
+ inner += "program {\n";
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ inner += "}\n";
+ scope--;
+ }
+
+ void operator()(const ASTIfThen& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
+ scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ scope--;
+ inner += fmt::format("{}}}\n", Indent());
+ }
+
+ void operator()(const ASTIfElse& ast) {
+ inner += Indent();
+ inner += "else {\n";
+
+ scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ scope--;
+
+ inner += Indent();
+ inner += "}\n";
+ }
+
+ void operator()(const ASTBlockEncoded& ast) {
+ inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
+ }
+
+ void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
+ inner += Indent();
+ inner += "Block;\n";
+ }
+
+ void operator()(const ASTVarSet& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
+ }
+
+ void operator()(const ASTLabel& ast) {
+ inner += fmt::format("Label_{}:\n", ast.index);
+ }
+
+ void operator()(const ASTGoto& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner +=
+ fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
+ }
+
+ void operator()(const ASTDoWhile& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}do {{\n", Indent());
+ scope++;
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ scope--;
+ inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
+ }
+
+ void operator()(const ASTReturn& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
+ ast.kills ? "discard" : "exit");
+ }
+
+ void operator()(const ASTBreak& ast) {
+ ExprPrinter expr_parser{};
+ std::visit(expr_parser, *ast.condition);
+ inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
+ }
+
+ void Visit(const ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ }
+
+ const std::string& GetResult() const {
+ return inner;
+ }
+
+private:
+ std::string_view Indent() {
+ if (space_segment_scope == scope) {
+ return space_segment;
+ }
+
+ // Ensure that we don't exceed our view.
+ ASSERT(scope * 2 < spaces.size());
+
+ space_segment = spaces.substr(0, scope * 2);
+ space_segment_scope = scope;
+ return space_segment;
+ }
+
+ std::string inner{};
+ std::string_view space_segment;
+
+ u32 scope{};
+ u32 space_segment_scope{};
+
+ static constexpr std::string_view spaces{" "};
+};
+
+std::string ASTManager::Print() const {
+ ASTPrinter printer{};
+ printer.Visit(main_node);
+ return printer.GetResult();
+}
+
+ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation)
+ : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {};
+
+ASTManager::~ASTManager() {
+ Clear();
+}
+
+void ASTManager::Init() {
+ main_node = ASTBase::Make<ASTProgram>(ASTNode{});
+ program = std::get_if<ASTProgram>(main_node->GetInnerData());
+ false_condition = MakeExpr<ExprBoolean>(false);
+}
+
+void ASTManager::DeclareLabel(u32 address) {
+ const auto pair = labels_map.emplace(address, labels_count);
+ if (pair.second) {
+ labels_count++;
+ labels.resize(labels_count);
+ }
+}
+
+void ASTManager::InsertLabel(u32 address) {
+ const u32 index = labels_map[address];
+ const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
+ labels[index] = label;
+ program->nodes.PushBack(label);
+}
+
+void ASTManager::InsertGoto(Expr condition, u32 address) {
+ const u32 index = labels_map[address];
+ const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
+ gotos.push_back(goto_node);
+ program->nodes.PushBack(goto_node);
+}
+
+void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
+ ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
+ program->nodes.PushBack(std::move(block));
+}
+
+void ASTManager::InsertReturn(Expr condition, bool kills) {
+ ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
+ program->nodes.PushBack(std::move(node));
+}
+
+// The decompile algorithm is based on
+// "Taming control flow: A structured approach to eliminating goto statements"
+// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
+// on the same structured level as the label which they jump to. This is done,
+// through outward/inward movements and lifting. Once they are at the same
+// level, you can enclose them in an "if" structure or a "do-while" structure.
+void ASTManager::Decompile() {
+ auto it = gotos.begin();
+ while (it != gotos.end()) {
+ const ASTNode goto_node = *it;
+ const auto label_index = goto_node->GetGotoLabel();
+ if (!label_index) {
+ return;
+ }
+ const ASTNode label = labels[*label_index];
+ if (!full_decompile) {
+ // We only decompile backward jumps
+ if (!IsBackwardsJump(goto_node, label)) {
+ it++;
+ continue;
+ }
+ }
+ if (IndirectlyRelated(goto_node, label)) {
+ while (!DirectlyRelated(goto_node, label)) {
+ MoveOutward(goto_node);
+ }
+ }
+ if (DirectlyRelated(goto_node, label)) {
+ u32 goto_level = goto_node->GetLevel();
+ const u32 label_level = label->GetLevel();
+ while (label_level < goto_level) {
+ MoveOutward(goto_node);
+ goto_level--;
+ }
+ // TODO(Blinkhawk): Implement Lifting and Inward Movements
+ }
+ if (label->GetParent() == goto_node->GetParent()) {
+ bool is_loop = false;
+ ASTNode current = goto_node->GetPrevious();
+ while (current) {
+ if (current == label) {
+ is_loop = true;
+ break;
+ }
+ current = current->GetPrevious();
+ }
+
+ if (is_loop) {
+ EncloseDoWhile(goto_node, label);
+ } else {
+ EncloseIfThen(goto_node, label);
+ }
+ it = gotos.erase(it);
+ continue;
+ }
+ it++;
+ }
+ if (full_decompile) {
+ for (const ASTNode& label : labels) {
+ auto& manager = label->GetManager();
+ manager.Remove(label);
+ }
+ labels.clear();
+ } else {
+ auto label_it = labels.begin();
+ while (label_it != labels.end()) {
+ bool can_remove = true;
+ ASTNode label = *label_it;
+ for (const ASTNode& goto_node : gotos) {
+ const auto label_index = goto_node->GetGotoLabel();
+ if (!label_index) {
+ return;
+ }
+ ASTNode& glabel = labels[*label_index];
+ if (glabel == label) {
+ can_remove = false;
+ break;
+ }
+ }
+ if (can_remove) {
+ label->MarkLabelUnused();
+ }
+ }
+ }
+}
+
+bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
+ u32 goto_level = goto_node->GetLevel();
+ u32 label_level = label_node->GetLevel();
+ while (goto_level > label_level) {
+ goto_level--;
+ goto_node = goto_node->GetParent();
+ }
+ while (label_level > goto_level) {
+ label_level--;
+ label_node = label_node->GetParent();
+ }
+ while (goto_node->GetParent() != label_node->GetParent()) {
+ goto_node = goto_node->GetParent();
+ label_node = label_node->GetParent();
+ }
+ ASTNode current = goto_node->GetPrevious();
+ while (current) {
+ if (current == label_node) {
+ return true;
+ }
+ current = current->GetPrevious();
+ }
+ return false;
+}
+
+bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
+ return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
+}
+
+bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
+ if (first->GetParent() == second->GetParent()) {
+ return false;
+ }
+ const u32 first_level = first->GetLevel();
+ const u32 second_level = second->GetLevel();
+ u32 min_level;
+ u32 max_level;
+ ASTNode max;
+ ASTNode min;
+ if (first_level > second_level) {
+ min_level = second_level;
+ min = second;
+ max_level = first_level;
+ max = first;
+ } else {
+ min_level = first_level;
+ min = first;
+ max_level = second_level;
+ max = second;
+ }
+
+ while (max_level > min_level) {
+ max_level--;
+ max = max->GetParent();
+ }
+
+ return min->GetParent() == max->GetParent();
+}
+
+void ASTManager::ShowCurrentState(std::string_view state) const {
+ LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
+ SanityCheck();
+}
+
+void ASTManager::SanityCheck() const {
+ for (const auto& label : labels) {
+ if (!label->GetParent()) {
+ LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
+ }
+ }
+}
+
+void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
+ ASTZipper& zipper = goto_node->GetManager();
+ const ASTNode loop_start = label->GetNext();
+ if (loop_start == goto_node) {
+ zipper.Remove(goto_node);
+ return;
+ }
+ const ASTNode parent = label->GetParent();
+ const Expr condition = goto_node->GetGotoCondition();
+ zipper.DetachSegment(loop_start, goto_node);
+ const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
+ ASTZipper* sub_zipper = do_while_node->GetSubNodes();
+ sub_zipper->Init(loop_start, do_while_node);
+ zipper.InsertAfter(do_while_node, label);
+ sub_zipper->Remove(goto_node);
+}
+
+void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
+ ASTZipper& zipper = goto_node->GetManager();
+ const ASTNode if_end = label->GetPrevious();
+ if (if_end == goto_node) {
+ zipper.Remove(goto_node);
+ return;
+ }
+ const ASTNode prev = goto_node->GetPrevious();
+ const Expr condition = goto_node->GetGotoCondition();
+ bool do_else = false;
+ if (!disable_else_derivation && prev->IsIfThen()) {
+ const Expr if_condition = prev->GetIfCondition();
+ do_else = ExprAreEqual(if_condition, condition);
+ }
+ const ASTNode parent = label->GetParent();
+ zipper.DetachSegment(goto_node, if_end);
+ ASTNode if_node;
+ if (do_else) {
+ if_node = ASTBase::Make<ASTIfElse>(parent);
+ } else {
+ Expr neg_condition = MakeExprNot(condition);
+ if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
+ }
+ ASTZipper* sub_zipper = if_node->GetSubNodes();
+ sub_zipper->Init(goto_node, if_node);
+ zipper.InsertAfter(if_node, prev);
+ sub_zipper->Remove(goto_node);
+}
+
+void ASTManager::MoveOutward(ASTNode goto_node) {
+ ASTZipper& zipper = goto_node->GetManager();
+ const ASTNode parent = goto_node->GetParent();
+ ASTZipper& zipper2 = parent->GetManager();
+ const ASTNode grandpa = parent->GetParent();
+ const bool is_loop = parent->IsLoop();
+ const bool is_else = parent->IsIfElse();
+ const bool is_if = parent->IsIfThen();
+
+ const ASTNode prev = goto_node->GetPrevious();
+ const ASTNode post = goto_node->GetNext();
+
+ const Expr condition = goto_node->GetGotoCondition();
+ zipper.DetachSingle(goto_node);
+ if (is_loop) {
+ const u32 var_index = NewVariable();
+ const Expr var_condition = MakeExpr<ExprVar>(var_index);
+ const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
+ const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
+ zipper2.InsertBefore(var_node_init, parent);
+ zipper.InsertAfter(var_node, prev);
+ goto_node->SetGotoCondition(var_condition);
+ const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
+ zipper.InsertAfter(break_node, var_node);
+ } else if (is_if || is_else) {
+ const u32 var_index = NewVariable();
+ const Expr var_condition = MakeExpr<ExprVar>(var_index);
+ const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
+ const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
+ if (is_if) {
+ zipper2.InsertBefore(var_node_init, parent);
+ } else {
+ zipper2.InsertBefore(var_node_init, parent->GetPrevious());
+ }
+ zipper.InsertAfter(var_node, prev);
+ goto_node->SetGotoCondition(var_condition);
+ if (post) {
+ zipper.DetachTail(post);
+ const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
+ ASTZipper* sub_zipper = if_node->GetSubNodes();
+ sub_zipper->Init(post, if_node);
+ zipper.InsertAfter(if_node, var_node);
+ }
+ } else {
+ UNREACHABLE();
+ }
+ const ASTNode next = parent->GetNext();
+ if (is_if && next && next->IsIfElse()) {
+ zipper2.InsertAfter(goto_node, next);
+ goto_node->SetParent(grandpa);
+ return;
+ }
+ zipper2.InsertAfter(goto_node, parent);
+ goto_node->SetParent(grandpa);
+}
+
+class ASTClearer {
+public:
+ ASTClearer() = default;
+
+ void operator()(const ASTProgram& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(const ASTIfThen& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(const ASTIfElse& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
+
+ void operator()(ASTBlockDecoded& ast) {
+ ast.nodes.clear();
+ }
+
+ void operator()([[maybe_unused]] const ASTVarSet& ast) {}
+
+ void operator()([[maybe_unused]] const ASTLabel& ast) {}
+
+ void operator()([[maybe_unused]] const ASTGoto& ast) {}
+
+ void operator()(const ASTDoWhile& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()([[maybe_unused]] const ASTReturn& ast) {}
+
+ void operator()([[maybe_unused]] const ASTBreak& ast) {}
+
+ void Visit(const ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ node->Clear();
+ }
+};
+
+void ASTManager::Clear() {
+ if (!main_node) {
+ return;
+ }
+ ASTClearer clearer{};
+ clearer.Visit(main_node);
+ main_node.reset();
+ program = nullptr;
+ labels_map.clear();
+ labels.clear();
+ gotos.clear();
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
new file mode 100644
index 000000000..a2f0044ba
--- /dev/null
+++ b/src/video_core/shader/ast.h
@@ -0,0 +1,400 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <list>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/shader/expr.h"
+#include "video_core/shader/node.h"
+
+namespace VideoCommon::Shader {
+
+class ASTBase;
+class ASTBlockDecoded;
+class ASTBlockEncoded;
+class ASTBreak;
+class ASTDoWhile;
+class ASTGoto;
+class ASTIfElse;
+class ASTIfThen;
+class ASTLabel;
+class ASTProgram;
+class ASTReturn;
+class ASTVarSet;
+
+using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
+ ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
+
+using ASTNode = std::shared_ptr<ASTBase>;
+
+enum class ASTZipperType : u32 {
+ Program,
+ IfThen,
+ IfElse,
+ Loop,
+};
+
+class ASTZipper final {
+public:
+ explicit ASTZipper();
+
+ void Init(ASTNode first, ASTNode parent);
+
+ ASTNode GetFirst() const {
+ return first;
+ }
+
+ ASTNode GetLast() const {
+ return last;
+ }
+
+ void PushBack(ASTNode new_node);
+ void PushFront(ASTNode new_node);
+ void InsertAfter(ASTNode new_node, ASTNode at_node);
+ void InsertBefore(ASTNode new_node, ASTNode at_node);
+ void DetachTail(ASTNode node);
+ void DetachSingle(ASTNode node);
+ void DetachSegment(ASTNode start, ASTNode end);
+ void Remove(ASTNode node);
+
+ ASTNode first{};
+ ASTNode last{};
+};
+
+class ASTProgram {
+public:
+ ASTZipper nodes{};
+};
+
+class ASTIfThen {
+public:
+ explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {}
+ Expr condition;
+ ASTZipper nodes{};
+};
+
+class ASTIfElse {
+public:
+ ASTZipper nodes{};
+};
+
+class ASTBlockEncoded {
+public:
+ explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {}
+ u32 start;
+ u32 end;
+};
+
+class ASTBlockDecoded {
+public:
+ explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {}
+ NodeBlock nodes;
+};
+
+class ASTVarSet {
+public:
+ explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {}
+ u32 index;
+ Expr condition;
+};
+
+class ASTLabel {
+public:
+ explicit ASTLabel(u32 index) : index{index} {}
+ u32 index;
+ bool unused{};
+};
+
+class ASTGoto {
+public:
+ explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {}
+ Expr condition;
+ u32 label;
+};
+
+class ASTDoWhile {
+public:
+ explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {}
+ Expr condition;
+ ASTZipper nodes{};
+};
+
+class ASTReturn {
+public:
+ explicit ASTReturn(Expr condition, bool kills)
+ : condition{std::move(condition)}, kills{kills} {}
+ Expr condition;
+ bool kills;
+};
+
+class ASTBreak {
+public:
+ explicit ASTBreak(Expr condition) : condition{std::move(condition)} {}
+ Expr condition;
+};
+
+class ASTBase {
+public:
+ explicit ASTBase(ASTNode parent, ASTData data)
+ : data{std::move(data)}, parent{std::move(parent)} {}
+
+ template <class U, class... Args>
+ static ASTNode Make(ASTNode parent, Args&&... args) {
+ return std::make_shared<ASTBase>(std::move(parent),
+ ASTData(U(std::forward<Args>(args)...)));
+ }
+
+ void SetParent(ASTNode new_parent) {
+ parent = std::move(new_parent);
+ }
+
+ ASTNode& GetParent() {
+ return parent;
+ }
+
+ const ASTNode& GetParent() const {
+ return parent;
+ }
+
+ u32 GetLevel() const {
+ u32 level = 0;
+ auto next_parent = parent;
+ while (next_parent) {
+ next_parent = next_parent->GetParent();
+ level++;
+ }
+ return level;
+ }
+
+ ASTData* GetInnerData() {
+ return &data;
+ }
+
+ const ASTData* GetInnerData() const {
+ return &data;
+ }
+
+ ASTNode GetNext() const {
+ return next;
+ }
+
+ ASTNode GetPrevious() const {
+ return previous;
+ }
+
+ ASTZipper& GetManager() {
+ return *manager;
+ }
+
+ const ASTZipper& GetManager() const {
+ return *manager;
+ }
+
+ std::optional<u32> GetGotoLabel() const {
+ auto inner = std::get_if<ASTGoto>(&data);
+ if (inner) {
+ return {inner->label};
+ }
+ return {};
+ }
+
+ Expr GetGotoCondition() const {
+ auto inner = std::get_if<ASTGoto>(&data);
+ if (inner) {
+ return inner->condition;
+ }
+ return nullptr;
+ }
+
+ void MarkLabelUnused() {
+ auto inner = std::get_if<ASTLabel>(&data);
+ if (inner) {
+ inner->unused = true;
+ }
+ }
+
+ bool IsLabelUnused() const {
+ auto inner = std::get_if<ASTLabel>(&data);
+ if (inner) {
+ return inner->unused;
+ }
+ return true;
+ }
+
+ std::optional<u32> GetLabelIndex() const {
+ auto inner = std::get_if<ASTLabel>(&data);
+ if (inner) {
+ return {inner->index};
+ }
+ return {};
+ }
+
+ Expr GetIfCondition() const {
+ auto inner = std::get_if<ASTIfThen>(&data);
+ if (inner) {
+ return inner->condition;
+ }
+ return nullptr;
+ }
+
+ void SetGotoCondition(Expr new_condition) {
+ auto inner = std::get_if<ASTGoto>(&data);
+ if (inner) {
+ inner->condition = std::move(new_condition);
+ }
+ }
+
+ bool IsIfThen() const {
+ return std::holds_alternative<ASTIfThen>(data);
+ }
+
+ bool IsIfElse() const {
+ return std::holds_alternative<ASTIfElse>(data);
+ }
+
+ bool IsBlockEncoded() const {
+ return std::holds_alternative<ASTBlockEncoded>(data);
+ }
+
+ void TransformBlockEncoded(NodeBlock&& nodes) {
+ data = ASTBlockDecoded(std::move(nodes));
+ }
+
+ bool IsLoop() const {
+ return std::holds_alternative<ASTDoWhile>(data);
+ }
+
+ ASTZipper* GetSubNodes() {
+ if (std::holds_alternative<ASTProgram>(data)) {
+ return &std::get_if<ASTProgram>(&data)->nodes;
+ }
+ if (std::holds_alternative<ASTIfThen>(data)) {
+ return &std::get_if<ASTIfThen>(&data)->nodes;
+ }
+ if (std::holds_alternative<ASTIfElse>(data)) {
+ return &std::get_if<ASTIfElse>(&data)->nodes;
+ }
+ if (std::holds_alternative<ASTDoWhile>(data)) {
+ return &std::get_if<ASTDoWhile>(&data)->nodes;
+ }
+ return nullptr;
+ }
+
+ void Clear() {
+ next.reset();
+ previous.reset();
+ parent.reset();
+ manager = nullptr;
+ }
+
+private:
+ friend class ASTZipper;
+
+ ASTData data;
+ ASTNode parent{};
+ ASTNode next{};
+ ASTNode previous{};
+ ASTZipper* manager{};
+};
+
+class ASTManager final {
+public:
+ ASTManager(bool full_decompile, bool disable_else_derivation);
+ ~ASTManager();
+
+ ASTManager(const ASTManager& o) = delete;
+ ASTManager& operator=(const ASTManager& other) = delete;
+
+ ASTManager(ASTManager&& other) noexcept = default;
+ ASTManager& operator=(ASTManager&& other) noexcept = default;
+
+ void Init();
+
+ void DeclareLabel(u32 address);
+
+ void InsertLabel(u32 address);
+
+ void InsertGoto(Expr condition, u32 address);
+
+ void InsertBlock(u32 start_address, u32 end_address);
+
+ void InsertReturn(Expr condition, bool kills);
+
+ std::string Print() const;
+
+ void Decompile();
+
+ void ShowCurrentState(std::string_view state) const;
+
+ void SanityCheck() const;
+
+ void Clear();
+
+ bool IsFullyDecompiled() const {
+ if (full_decompile) {
+ return gotos.empty();
+ }
+
+ for (ASTNode goto_node : gotos) {
+ auto label_index = goto_node->GetGotoLabel();
+ if (!label_index) {
+ return false;
+ }
+ ASTNode glabel = labels[*label_index];
+ if (IsBackwardsJump(goto_node, glabel)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ ASTNode GetProgram() const {
+ return main_node;
+ }
+
+ u32 GetVariables() const {
+ return variables;
+ }
+
+ const std::vector<ASTNode>& GetLabels() const {
+ return labels;
+ }
+
+private:
+ bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
+
+ bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
+
+ bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
+
+ void EncloseDoWhile(ASTNode goto_node, ASTNode label);
+
+ void EncloseIfThen(ASTNode goto_node, ASTNode label);
+
+ void MoveOutward(ASTNode goto_node);
+
+ u32 NewVariable() {
+ return variables++;
+ }
+
+ bool full_decompile{};
+ bool disable_else_derivation{};
+ std::unordered_map<u32, u32> labels_map{};
+ u32 labels_count{};
+ std::vector<ASTNode> labels{};
+ std::list<ASTNode> gotos{};
+ u32 variables{};
+ ASTProgram* program{};
+ ASTNode main_node{};
+ Expr false_condition{};
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
new file mode 100644
index 000000000..cddcbd4f0
--- /dev/null
+++ b/src/video_core/shader/compiler_settings.cpp
@@ -0,0 +1,26 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader/compiler_settings.h"
+
+namespace VideoCommon::Shader {
+
+std::string CompileDepthAsString(const CompileDepth cd) {
+ switch (cd) {
+ case CompileDepth::BruteForce:
+ return "Brute Force Compile";
+ case CompileDepth::FlowStack:
+ return "Simple Flow Stack Mode";
+ case CompileDepth::NoFlowStack:
+ return "Remove Flow Stack";
+ case CompileDepth::DecompileBackwards:
+ return "Decompile Backward Jumps";
+ case CompileDepth::FullDecompile:
+ return "Full Decompilation";
+ default:
+ return "Unknown Compiler Process";
+ }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
new file mode 100644
index 000000000..916018c01
--- /dev/null
+++ b/src/video_core/shader/compiler_settings.h
@@ -0,0 +1,26 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+enum class CompileDepth : u32 {
+ BruteForce = 0,
+ FlowStack = 1,
+ NoFlowStack = 2,
+ DecompileBackwards = 3,
+ FullDecompile = 4,
+};
+
+std::string CompileDepthAsString(CompileDepth cd);
+
+struct CompilerSettings {
+ CompileDepth depth{CompileDepth::NoFlowStack};
+ bool disable_else_derivation{true};
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/const_buffer_locker.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::SamplerDescriptor;
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
+ : stage{shader_stage} {}
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine)
+ : stage{shader_stage}, engine{&engine} {}
+
+ConstBufferLocker::~ConstBufferLocker() = default;
+
+std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
+ const std::pair<u32, u32> key = {buffer, offset};
+ const auto iter = keys.find(key);
+ if (iter != keys.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+ keys.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
+ const u32 key = offset;
+ const auto iter = bound_samplers.find(key);
+ if (iter != bound_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+ bound_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
+ u32 buffer, u32 offset) {
+ const std::pair key = {buffer, offset};
+ const auto iter = bindless_samplers.find(key);
+ if (iter != bindless_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+ bindless_samplers.emplace(key, value);
+ return value;
+}
+
+void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
+ keys.insert_or_assign({buffer, offset}, value);
+}
+
+void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+ bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+ bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool ConstBufferLocker::IsConsistent() const {
+ if (!engine) {
+ return false;
+ }
+ return std::all_of(keys.begin(), keys.end(),
+ [this](const auto& pair) {
+ const auto [cbuf, offset] = pair.first;
+ const auto value = pair.second;
+ return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+ }) &&
+ std::all_of(bound_samplers.begin(), bound_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [key, value] = sampler;
+ return value == engine->AccessBoundSampler(stage, key);
+ }) &&
+ std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [cbuf, offset] = sampler.first;
+ const auto value = sampler.second;
+ return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+ });
+}
+
+bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
+ return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
+ bindless_samplers == rhs.bindless_samplers;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+ std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+/**
+ * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
+ * compiler. with it, the shader can obtain required data from GPU state and store it for disk
+ * shader compilation.
+ **/
+class ConstBufferLocker {
+public:
+ explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
+
+ explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine);
+
+ ~ConstBufferLocker();
+
+ /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
+ /// not it will obtain it from maxwell3d and register it.
+ std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+ /// Inserts a key.
+ void InsertKey(u32 buffer, u32 offset, u32 value);
+
+ /// Inserts a bound sampler key.
+ void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Inserts a bindless sampler key.
+ void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Checks keys and samplers against engine's current const buffers. Returns true if they are
+ /// the same value, false otherwise;
+ bool IsConsistent() const;
+
+ /// Returns true if the keys are equal to the other ones in the locker.
+ bool HasEqualKeys(const ConstBufferLocker& rhs) const;
+
+ /// Gives an getter to the const buffer keys in the database.
+ const KeyMap& GetKeys() const {
+ return keys;
+ }
+
+ /// Gets samplers database.
+ const BoundSamplerMap& GetBoundSamplers() const {
+ return bound_samplers;
+ }
+
+ /// Gets bindless samplers database.
+ const BindlessSamplerMap& GetBindlessSamplers() const {
+ return bindless_samplers;
+ }
+
+private:
+ const Tegra::Engines::ShaderType stage;
+ Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+ KeyMap keys;
+ BoundSamplerMap bound_samplers;
+ BindlessSamplerMap bindless_samplers;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index ec3a76690..b427ac873 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -4,18 +4,21 @@
#include <list>
#include <map>
+#include <set>
#include <stack>
#include <unordered_map>
-#include <unordered_set>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
+
namespace {
+
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -34,14 +37,20 @@ struct BlockStack {
std::stack<u32> pbk_stack{};
};
-struct BlockBranchInfo {
- Condition condition{};
- s32 address{exit_branch};
- bool kill{};
- bool is_sync{};
- bool is_brk{};
- bool ignore{};
-};
+template <typename T, typename... Args>
+BlockBranchInfo MakeBranchInfo(Args&&... args) {
+ static_assert(std::is_convertible_v<T, BranchData>);
+ return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
+}
+
+bool BlockBranchIsIgnored(BlockBranchInfo first) {
+ bool ignore = false;
+ if (std::holds_alternative<SingleBranch>(*first)) {
+ const auto branch = std::get_if<SingleBranch>(first.get());
+ ignore = branch->ignore;
+ }
+ return ignore;
+}
struct BlockInfo {
u32 start{};
@@ -55,21 +64,21 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
- const u32 start)
- : start{start}, program_code{program_code}, program_size{program_size} {}
+ explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
+ : program_code{program_code}, start{start}, locker{locker} {}
- u32 start{};
- std::vector<BlockInfo> block_info{};
- std::list<u32> inspect_queries{};
- std::list<Query> queries{};
- std::unordered_map<u32, u32> registered{};
- std::unordered_set<u32> labels{};
- std::map<u32, u32> ssy_labels{};
- std::map<u32, u32> pbk_labels{};
- std::unordered_map<u32, BlockStack> stacks{};
const ProgramCode& program_code;
- const std::size_t program_size;
+ ConstBufferLocker& locker;
+ u32 start{};
+ std::vector<BlockInfo> block_info;
+ std::list<u32> inspect_queries;
+ std::list<Query> queries;
+ std::unordered_map<u32, u32> registered;
+ std::set<u32> labels;
+ std::map<u32, u32> ssy_labels;
+ std::map<u32, u32> pbk_labels;
+ std::unordered_map<u32, BlockStack> stacks;
+ ASTManager* manager{};
};
enum class BlockCollision : u32 { None, Found, Inside };
@@ -102,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
}
Pred GetPredicate(u32 index, bool negated) {
- return static_cast<Pred>(index + (negated ? 8 : 0));
+ return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
}
/**
@@ -122,10 +131,122 @@ enum class ParseResult : u32 {
AbnormalFlow,
};
+struct BranchIndirectInfo {
+ u32 buffer{};
+ u32 offset{};
+ u32 entries{};
+ s32 relative_position{};
+};
+
+struct BufferInfo {
+ u32 index;
+ u32 offset;
+};
+
+std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
+ const Instruction instr = state.program_code[pos];
+ const auto opcode = OpCode::Decode(instr);
+ if (opcode->get().GetId() != OpCode::Id::BRX) {
+ return std::nullopt;
+ }
+ if (instr.brx.constant_buffer != 0) {
+ return std::nullopt;
+ }
+ --pos;
+ return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
+}
+
+template <typename Result, typename TestCallable, typename PackCallable>
+// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
+// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
+std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
+ PackCallable pack) {
+ for (; pos >= state.start; --pos) {
+ if (IsSchedInstruction(pos, state.start)) {
+ continue;
+ }
+ const Instruction instr = state.program_code[pos];
+ const auto opcode = OpCode::Decode(instr);
+ if (!opcode) {
+ continue;
+ }
+ if (test(instr, opcode->get())) {
+ --pos;
+ return std::make_optional(pack(instr, opcode->get()));
+ }
+ }
+ return std::nullopt;
+}
+
+std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
+ u64 brx_tracked_register) {
+ return TrackInstruction<std::pair<BufferInfo, u64>>(
+ state, pos,
+ [brx_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::LD_C &&
+ instr.gpr0.Value() == brx_tracked_register &&
+ instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
+ },
+ [](auto instr, const auto& opcode) {
+ const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
+ static_cast<u32>(instr.cbuf36.GetOffset())};
+ return std::make_pair(info, instr.gpr8.Value());
+ });
+}
+
+std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
+ u64 ldc_tracked_register) {
+ return TrackInstruction<u64>(state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
+}
+
+std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
+ u64 shl_tracked_register) {
+ return TrackInstruction<u32>(state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
+}
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
+ const auto brx_info = GetBRXInfo(state, pos);
+ if (!brx_info) {
+ return std::nullopt;
+ }
+ const auto [relative_position, brx_tracked_register] = *brx_info;
+
+ const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
+ if (!ldc_info) {
+ return std::nullopt;
+ }
+ const auto [buffer_info, ldc_tracked_register] = *ldc_info;
+
+ const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
+ if (!shl_tracked_register) {
+ return std::nullopt;
+ }
+
+ const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
+ if (!entries) {
+ return std::nullopt;
+ }
+
+ return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
+}
+
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
u32 offset = static_cast<u32>(address);
- const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
+ const u32 end_address = static_cast<u32>(state.program_code.size());
ParseInfo parse_info{};
+ SingleBranch single_branch{};
const auto insert_label = [](CFGRebuildState& state, u32 address) {
const auto pair = state.labels.emplace(address);
@@ -138,13 +259,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
if (offset >= end_address) {
// ASSERT_OR_EXECUTE can't be used, as it ignores the break
ASSERT_MSG(false, "Shader passed the current limit!");
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.ignore = false;
+
+ single_branch.address = exit_branch;
+ single_branch.ignore = false;
break;
}
if (state.registered.count(offset) != 0) {
- parse_info.branch_info.address = offset;
- parse_info.branch_info.ignore = true;
+ single_branch.address = offset;
+ single_branch.ignore = true;
break;
}
if (IsSchedInstruction(offset, state.start)) {
@@ -161,24 +283,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = exit_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@@ -187,99 +311,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
return {ParseResult::AbnormalFlow, parse_info};
}
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
const u32 branch_offset = offset + instr.bra.GetBranchTarget();
if (branch_offset == 0) {
- parse_info.branch_info.address = exit_branch;
+ single_branch.address = exit_branch;
} else {
- parse_info.branch_info.address = branch_offset;
+ single_branch.address = branch_offset;
}
insert_label(state, branch_offset);
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::SYNC: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = unassigned_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = true;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = unassigned_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = true;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::BRK: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = unassigned_branch;
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = true;
- parse_info.branch_info.ignore = false;
+ single_branch.address = unassigned_branch;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = true;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
case OpCode::Id::KIL: {
const auto pred_index = static_cast<u32>(instr.pred.pred_index);
- parse_info.branch_info.condition.predicate =
- GetPredicate(pred_index, instr.negate_pred != 0);
- if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+ single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+ if (single_branch.condition.predicate == Pred::NeverExecute) {
offset++;
continue;
}
const ConditionCode cc = instr.flow_condition_code;
- parse_info.branch_info.condition.cc = cc;
+ single_branch.condition.cc = cc;
if (cc == ConditionCode::F) {
offset++;
continue;
}
- parse_info.branch_info.address = exit_branch;
- parse_info.branch_info.kill = true;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
- parse_info.branch_info.ignore = false;
+ single_branch.address = exit_branch;
+ single_branch.kill = true;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
+ single_branch.ignore = false;
parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill,
+ single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
return {ParseResult::ControlCaught, parse_info};
}
@@ -296,7 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
break;
}
case OpCode::Id::BRX: {
- return {ParseResult::AbnormalFlow, parse_info};
+ const auto tmp = TrackBranchIndirectInfo(state, offset);
+ if (!tmp) {
+ LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+
+ const auto result = *tmp;
+ const s32 pc_target = offset + result.relative_position;
+ std::vector<CaseBranch> branches;
+ for (u32 i = 0; i < result.entries; i++) {
+ auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ if (!key) {
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+ u32 value = *key;
+ u32 target = static_cast<u32>((value >> 3) + pc_target);
+ insert_label(state, target);
+ branches.emplace_back(value, target);
+ }
+ parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+ static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+ return {ParseResult::ControlCaught, parse_info};
}
default:
break;
@@ -304,10 +459,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
offset++;
}
- parse_info.branch_info.kill = false;
- parse_info.branch_info.is_sync = false;
- parse_info.branch_info.is_brk = false;
+ single_branch.kill = false;
+ single_branch.is_sync = false;
+ single_branch.is_brk = false;
parse_info.end_address = offset - 1;
+ parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+ single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
+ single_branch.is_brk, single_branch.ignore);
return {ParseResult::BlockEnd, parse_info};
}
@@ -331,9 +489,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& current_block = state.block_info[block_index];
current_block.end = address - 1;
new_block.branch = current_block.branch;
- BlockBranchInfo forward_branch{};
- forward_branch.address = address;
- forward_branch.ignore = true;
+ BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
+ const auto branch = std::get_if<SingleBranch>(forward_branch.get());
+ branch->address = address;
+ branch->ignore = true;
current_block.branch = forward_branch;
return true;
}
@@ -348,12 +507,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
block_info.branch = parse_info.branch_info;
- if (parse_info.branch_info.condition.IsUnconditional()) {
+ if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
+ const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
+ if (branch->condition.IsUnconditional()) {
+ return true;
+ }
+ const u32 fallthrough_address = parse_info.end_address + 1;
+ state.inspect_queries.push_front(fallthrough_address);
return true;
}
-
- const u32 fallthrough_address = parse_info.end_address + 1;
- state.inspect_queries.push_front(fallthrough_address);
return true;
}
@@ -391,91 +553,205 @@ bool TryQuery(CFGRebuildState& state) {
state.queries.pop_front();
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
- if (!block.branch.condition.IsUnconditional()) {
- q2.address = block.end + 1;
- state.queries.push_back(q2);
- }
+ if (std::holds_alternative<SingleBranch>(*block.branch)) {
+ const auto branch = std::get_if<SingleBranch>(block.branch.get());
+ if (!branch->condition.IsUnconditional()) {
+ q2.address = block.end + 1;
+ state.queries.push_back(q2);
+ }
- Query conditional_query{q2};
- if (block.branch.is_sync) {
- if (block.branch.address == unassigned_branch) {
- block.branch.address = conditional_query.ssy_stack.top();
+ Query conditional_query{q2};
+ if (branch->is_sync) {
+ if (branch->address == unassigned_branch) {
+ branch->address = conditional_query.ssy_stack.top();
+ }
+ conditional_query.ssy_stack.pop();
}
- conditional_query.ssy_stack.pop();
- }
- if (block.branch.is_brk) {
- if (block.branch.address == unassigned_branch) {
- block.branch.address = conditional_query.pbk_stack.top();
+ if (branch->is_brk) {
+ if (branch->address == unassigned_branch) {
+ branch->address = conditional_query.pbk_stack.top();
+ }
+ conditional_query.pbk_stack.pop();
}
- conditional_query.pbk_stack.pop();
+ conditional_query.address = branch->address;
+ state.queries.push_back(std::move(conditional_query));
+ return true;
+ }
+ const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+ for (const auto& branch_case : multi_branch->branches) {
+ Query conditional_query{q2};
+ conditional_query.address = branch_case.address;
+ state.queries.push_back(std::move(conditional_query));
}
- conditional_query.address = block.branch.address;
- state.queries.push_back(std::move(conditional_query));
return true;
}
+
} // Anonymous namespace
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
- std::size_t program_size, u32 start_address) {
- CFGRebuildState state{program_code, program_size, start_address};
+void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
+ const auto get_expr = ([&](const Condition& cond) -> Expr {
+ Expr result{};
+ if (cond.cc != ConditionCode::T) {
+ result = MakeExpr<ExprCondCode>(cond.cc);
+ }
+ if (cond.predicate != Pred::UnusedIndex) {
+ u32 pred = static_cast<u32>(cond.predicate);
+ bool negate = false;
+ if (pred > 7) {
+ negate = true;
+ pred -= 8;
+ }
+ Expr extra = MakeExpr<ExprPredicate>(pred);
+ if (negate) {
+ extra = MakeExpr<ExprNot>(extra);
+ }
+ if (result) {
+ return MakeExpr<ExprAnd>(extra, result);
+ }
+ return extra;
+ }
+ if (result) {
+ return result;
+ }
+ return MakeExpr<ExprBoolean>(true);
+ });
+ if (std::holds_alternative<SingleBranch>(*branch_info)) {
+ const auto branch = std::get_if<SingleBranch>(branch_info.get());
+ if (branch->address < 0) {
+ if (branch->kill) {
+ mm.InsertReturn(get_expr(branch->condition), true);
+ return;
+ }
+ mm.InsertReturn(get_expr(branch->condition), false);
+ return;
+ }
+ mm.InsertGoto(get_expr(branch->condition), branch->address);
+ return;
+ }
+ const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+ for (const auto& branch_case : multi_branch->branches) {
+ mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
+ branch_case.address);
+ }
+}
+
+void DecompileShader(CFGRebuildState& state) {
+ state.manager->Init();
+ for (auto label : state.labels) {
+ state.manager->DeclareLabel(label);
+ }
+ for (auto& block : state.block_info) {
+ if (state.labels.count(block.start) != 0) {
+ state.manager->InsertLabel(block.start);
+ }
+ const bool ignore = BlockBranchIsIgnored(block.branch);
+ u32 end = ignore ? block.end + 1 : block.end;
+ state.manager->InsertBlock(block.start, end);
+ if (!ignore) {
+ InsertBranch(*state.manager, block.branch);
+ }
+ }
+ state.manager->Decompile();
+}
+
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker) {
+ auto result_out = std::make_unique<ShaderCharacteristics>();
+ if (settings.depth == CompileDepth::BruteForce) {
+ result_out->settings.depth = CompileDepth::BruteForce;
+ return result_out;
+ }
+ CFGRebuildState state{program_code, start_address, locker};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
state.inspect_queries.push_back(state.start);
while (!state.inspect_queries.empty()) {
if (!TryInspectAddress(state)) {
- return {};
+ result_out->settings.depth = CompileDepth::BruteForce;
+ return result_out;
}
}
- // Decompile Stacks
- state.queries.push_back(Query{state.start, {}, {}});
- bool decompiled = true;
- while (!state.queries.empty()) {
- if (!TryQuery(state)) {
- decompiled = false;
- break;
+ bool use_flow_stack = true;
+
+ bool decompiled = false;
+
+ if (settings.depth != CompileDepth::FlowStack) {
+ // Decompile Stacks
+ state.queries.push_back(Query{state.start, {}, {}});
+ decompiled = true;
+ while (!state.queries.empty()) {
+ if (!TryQuery(state)) {
+ decompiled = false;
+ break;
+ }
}
}
+ use_flow_stack = !decompiled;
+
// Sort and organize results
std::sort(state.block_info.begin(), state.block_info.end(),
- [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
- ShaderCharacteristics result_out{};
- result_out.decompilable = decompiled;
- result_out.start = start_address;
- result_out.end = start_address;
- for (const auto& block : state.block_info) {
+ [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
+ if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
+ ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
+ settings.disable_else_derivation};
+ state.manager = &manager;
+ DecompileShader(state);
+ decompiled = state.manager->IsFullyDecompiled();
+ if (!decompiled) {
+ if (settings.depth == CompileDepth::FullDecompile) {
+ LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
+ } else {
+ LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
+ }
+ state.manager->ShowCurrentState("Of Shader");
+ state.manager->Clear();
+ } else {
+ auto characteristics = std::make_unique<ShaderCharacteristics>();
+ characteristics->start = start_address;
+ characteristics->settings.depth = settings.depth;
+ characteristics->manager = std::move(manager);
+ characteristics->end = state.block_info.back().end + 1;
+ return characteristics;
+ }
+ }
+
+ result_out->start = start_address;
+ result_out->settings.depth =
+ use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
+ result_out->blocks.clear();
+ for (auto& block : state.block_info) {
ShaderBlock new_block{};
new_block.start = block.start;
new_block.end = block.end;
- new_block.ignore_branch = block.branch.ignore;
+ new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
if (!new_block.ignore_branch) {
- new_block.branch.cond = block.branch.condition;
- new_block.branch.kills = block.branch.kill;
- new_block.branch.address = block.branch.address;
+ new_block.branch = block.branch;
}
- result_out.end = std::max(result_out.end, block.end);
- result_out.blocks.push_back(new_block);
+ result_out->end = std::max(result_out->end, block.end);
+ result_out->blocks.push_back(new_block);
}
- if (result_out.decompilable) {
- result_out.labels = std::move(state.labels);
- return {std::move(result_out)};
+ if (!use_flow_stack) {
+ result_out->labels = std::move(state.labels);
+ return result_out;
}
- // If it's not decompilable, merge the unlabelled blocks together
- auto back = result_out.blocks.begin();
+ auto back = result_out->blocks.begin();
auto next = std::next(back);
- while (next != result_out.blocks.end()) {
+ while (next != result_out->blocks.end()) {
if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
back->end = next->end;
- next = result_out.blocks.erase(next);
+ next = result_out->blocks.erase(next);
continue;
}
back = next;
++next;
}
- return {std::move(result_out)};
+
+ return result_out;
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index b0a5e4f8c..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -6,9 +6,12 @@
#include <list>
#include <optional>
-#include <unordered_set>
+#include <set>
+#include <variant>
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/compiler_settings.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -35,29 +38,61 @@ struct Condition {
}
};
-struct ShaderBlock {
- struct Branch {
- Condition cond{};
- bool kills{};
- s32 address{};
+class SingleBranch {
+public:
+ SingleBranch() = default;
+ SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
+ bool ignore)
+ : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
+ ignore{ignore} {}
+
+ bool operator==(const SingleBranch& b) const {
+ return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
+ std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
+ }
+
+ bool operator!=(const SingleBranch& b) const {
+ return !operator==(b);
+ }
+
+ Condition condition{};
+ s32 address{exit_branch};
+ bool kill{};
+ bool is_sync{};
+ bool is_brk{};
+ bool ignore{};
+};
- bool operator==(const Branch& b) const {
- return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
- }
+struct CaseBranch {
+ CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
+ u32 cmp_value;
+ u32 address;
+};
+
+class MultiBranch {
+public:
+ MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
+ : gpr{gpr}, branches{std::move(branches)} {}
+
+ u32 gpr{};
+ std::vector<CaseBranch> branches{};
+};
+
+using BranchData = std::variant<SingleBranch, MultiBranch>;
+using BlockBranchInfo = std::shared_ptr<BranchData>;
- bool operator!=(const Branch& b) const {
- return !operator==(b);
- }
- };
+bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
+struct ShaderBlock {
u32 start{};
u32 end{};
bool ignore_branch{};
- Branch branch{};
+ BlockBranchInfo branch{};
bool operator==(const ShaderBlock& sb) const {
- return std::tie(start, end, ignore_branch, branch) ==
- std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
+ return std::tie(start, end, ignore_branch) ==
+ std::tie(sb.start, sb.end, sb.ignore_branch) &&
+ BlockBranchInfoAreEqual(branch, sb.branch);
}
bool operator!=(const ShaderBlock& sb) const {
@@ -67,13 +102,15 @@ struct ShaderBlock {
struct ShaderCharacteristics {
std::list<ShaderBlock> blocks{};
- bool decompilable{};
+ std::set<u32> labels{};
u32 start{};
u32 end{};
- std::unordered_set<u32> labels{};
+ ASTManager manager{true, true};
+ CompilerSettings settings{};
};
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
- std::size_t program_size, u32 start_address);
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 47a9fd961..22c3e5120 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,60 +33,140 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
-} // namespace
+} // Anonymous namespace
+
+class ASTDecoder {
+public:
+ ASTDecoder(ShaderIR& ir) : ir(ir) {}
+
+ void operator()(ASTProgram& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTIfThen& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTIfElse& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTBlockEncoded& ast) {}
+
+ void operator()(ASTBlockDecoded& ast) {}
+
+ void operator()(ASTVarSet& ast) {}
+
+ void operator()(ASTLabel& ast) {}
+
+ void operator()(ASTGoto& ast) {}
+
+ void operator()(ASTDoWhile& ast) {
+ ASTNode current = ast.nodes.GetFirst();
+ while (current) {
+ Visit(current);
+ current = current->GetNext();
+ }
+ }
+
+ void operator()(ASTReturn& ast) {}
+
+ void operator()(ASTBreak& ast) {}
+
+ void Visit(ASTNode& node) {
+ std::visit(*this, *node->GetInnerData());
+ if (node->IsBlockEncoded()) {
+ auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
+ NodeBlock bb = ir.DecodeRange(block->start, block->end);
+ node->TransformBlockEncoded(std::move(bb));
+ }
+ }
+
+private:
+ ShaderIR& ir;
+};
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
- disable_flow_stack = false;
- const auto info = ScanFlow(program_code, program_size, main_offset);
- if (info) {
- const auto& shader_info = *info;
- coverage_begin = shader_info.start;
- coverage_end = shader_info.end;
- if (shader_info.decompilable) {
- disable_flow_stack = true;
- const auto insert_block = [this](NodeBlock& nodes, u32 label) {
- if (label == static_cast<u32>(exit_branch)) {
- return;
- }
- basic_blocks.insert({label, nodes});
- };
- const auto& blocks = shader_info.blocks;
- NodeBlock current_block;
- u32 current_label = static_cast<u32>(exit_branch);
- for (auto& block : blocks) {
- if (shader_info.labels.count(block.start) != 0) {
- insert_block(current_block, current_label);
- current_block.clear();
- current_label = block.start;
- }
- if (!block.ignore_branch) {
- DecodeRangeInner(current_block, block.start, block.end);
- InsertControlFlow(current_block, block);
- } else {
- DecodeRangeInner(current_block, block.start, block.end + 1);
- }
- }
- insert_block(current_block, current_label);
- return;
- }
- LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
- // we can't decompile it, fallback to standard method
+ decompiled = false;
+ auto info = ScanFlow(program_code, main_offset, settings, locker);
+ auto& shader_info = *info;
+ coverage_begin = shader_info.start;
+ coverage_end = shader_info.end;
+ switch (shader_info.settings.depth) {
+ case CompileDepth::FlowStack: {
for (const auto& block : shader_info.blocks) {
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
}
- return;
+ break;
}
- LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
-
- // Now we need to deal with an undecompilable shader. We need to brute force
- // a shader that captures every position.
- coverage_begin = main_offset;
- const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
- coverage_end = shader_end;
- for (u32 label = main_offset; label < shader_end; label++) {
- basic_blocks.insert({label, DecodeRange(label, label + 1)});
+ case CompileDepth::NoFlowStack: {
+ disable_flow_stack = true;
+ const auto insert_block = [this](NodeBlock& nodes, u32 label) {
+ if (label == static_cast<u32>(exit_branch)) {
+ return;
+ }
+ basic_blocks.insert({label, nodes});
+ };
+ const auto& blocks = shader_info.blocks;
+ NodeBlock current_block;
+ u32 current_label = static_cast<u32>(exit_branch);
+ for (auto& block : blocks) {
+ if (shader_info.labels.count(block.start) != 0) {
+ insert_block(current_block, current_label);
+ current_block.clear();
+ current_label = block.start;
+ }
+ if (!block.ignore_branch) {
+ DecodeRangeInner(current_block, block.start, block.end);
+ InsertControlFlow(current_block, block);
+ } else {
+ DecodeRangeInner(current_block, block.start, block.end + 1);
+ }
+ }
+ insert_block(current_block, current_label);
+ break;
+ }
+ case CompileDepth::DecompileBackwards:
+ case CompileDepth::FullDecompile: {
+ program_manager = std::move(shader_info.manager);
+ disable_flow_stack = true;
+ decompiled = true;
+ ASTDecoder decoder{*this};
+ ASTNode program = GetASTProgram();
+ decoder.Visit(program);
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
+ [[fallthrough]];
+ case CompileDepth::BruteForce: {
+ const auto shader_end = static_cast<u32>(program_code.size());
+ coverage_begin = main_offset;
+ coverage_end = shader_end;
+ for (u32 label = main_offset; label < shader_end; ++label) {
+ basic_blocks.insert({label, DecodeRange(label, label + 1)});
+ }
+ break;
+ }
+ }
+ if (settings.depth != shader_info.settings.depth) {
+ LOG_WARNING(
+ HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
+ CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
}
}
@@ -118,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
}
return result;
};
- if (block.branch.address < 0) {
- if (block.branch.kills) {
- Node n = Operation(OperationCode::Discard);
- n = apply_conditions(block.branch.cond, n);
+ if (std::holds_alternative<SingleBranch>(*block.branch)) {
+ auto branch = std::get_if<SingleBranch>(block.branch.get());
+ if (branch->address < 0) {
+ if (branch->kill) {
+ Node n = Operation(OperationCode::Discard);
+ n = apply_conditions(branch->condition, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
+ }
+ Node n = Operation(OperationCode::Exit);
+ n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
- Node n = Operation(OperationCode::Exit);
- n = apply_conditions(block.branch.cond, n);
+ Node n = Operation(OperationCode::Branch, Immediate(branch->address));
+ n = apply_conditions(branch->condition, n);
bb.push_back(n);
global_code.push_back(n);
return;
}
- Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
- n = apply_conditions(block.branch.cond, n);
- bb.push_back(n);
- global_code.push_back(n);
+ auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+ Node op_a = GetRegister(multi_branch->gpr);
+ for (auto& branch_case : multi_branch->branches) {
+ Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
+ Node op_b = Immediate(branch_case.cmp_value);
+ Node condition =
+ GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+ auto result = Conditional(condition, {n});
+ bb.push_back(result);
+ global_code.push_back(result);
+ }
}
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
if (instr.fmul.tab5cb8_2 != 0) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
}
if (instr.fmul.tab5c68_0 != 1) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0.Value());
}
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
+ LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
- LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
- if (instr.alu_half.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
} else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b73f6536e..a33d242e9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
case OpCode::Id::ICMP_IMM: {
const Node zero = Immediate(0);
- const auto [op_b, test] = [&]() -> std::pair<Node, Node> {
+ const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::ICMP_CR:
return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
@@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
return {zero, zero};
}
}();
- const Node op_a = GetRegister(instr.gpr8);
+ const Node op_lhs = GetRegister(instr.gpr8);
const Node comparison =
GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
- SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
break;
}
case OpCode::Id::LOP_C:
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
if (instr.ffma.tab5980_0 != 1) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
}
if (instr.ffma.tab5980_1 != 0) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
}
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.hset2.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 840694527..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
@@ -18,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- DEBUG_ASSERT(instr.hsetp2.ftz == 0);
+ if (instr.hsetp2.ftz != 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
@@ -32,6 +35,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
h_and = instr.hsetp2.cbuf_and_imm.h_and;
op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
+ // F32 is hardcoded in hardware
+ op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
break;
case OpCode::Id::HSETP2_IMM:
cond = instr.hsetp2.cbuf_and_imm.cond;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 95ec1cdd9..d2fe4ec5d 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
}
Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset{static_cast<std::size_t>(image.index.Value())};
- if (const auto image = TryUseExistingImage(offset, type)) {
- return *image;
+ const auto offset = static_cast<u32>(image.index.Value());
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [offset](const Image& entry) { return entry.GetOffset() == offset; });
+ if (it != std::end(used_images)) {
+ ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, type);
}
Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
- const Node image_register{GetRegister(reg)};
- const auto [base_image, cbuf_index, cbuf_offset]{
- TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
-
- if (const auto image = TryUseExistingImage(cbuf_key, type)) {
- return *image;
- }
-
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
- .first->second;
-}
-
-Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) {
- auto it = used_images.find(offset);
- if (it == used_images.end()) {
- return nullptr;
+ const Node image_register = GetRegister(reg);
+ const auto [base_image, buffer, offset] =
+ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [buffer = buffer, offset = offset](const Image& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != std::end(used_images)) {
+ ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- auto& image = it->second;
- ASSERT(image.GetType() == type);
- return &image;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, buffer, type);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7923d4d69..335d78146 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}();
const auto [real_address_base, base_address, descriptor] =
- TrackAndGetGlobalMemory(bb, instr, false);
+ TrackGlobalMemory(bb, instr, false);
const u32 count = GetUniformTypeElementsCount(type);
+ if (!real_address_base || !base_address) {
+ // Tracking failed, load zeroes.
+ for (u32 i = 0; i < count; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
+ }
+ break;
+ }
+
for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
const Node real_address =
@@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}();
const auto [real_address_base, base_address, descriptor] =
- TrackAndGetGlobalMemory(bb, instr, true);
-
- // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
- SetTemporary(bb, 0, real_address_base);
+ TrackGlobalMemory(bb, instr, true);
+ if (!real_address_base || !base_address) {
+ // Tracking failed, skip the store.
+ break;
+ }
const u32 count = GetUniformTypeElementsCount(type);
for (u32 i = 0; i < count; ++i) {
- SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
- }
- for (u32 i = 0; i < count; ++i) {
const Node it_offset = Immediate(i * 4);
- const Node real_address =
- Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+ const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-
- bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
+ const Node value = GetRegister(instr.gpr0.Value() + i);
+ bb.push_back(Operation(OperationCode::Assign, gmem, value));
}
break;
}
@@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return pc;
}
-std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
- Instruction instr,
- bool is_write) {
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
+ Instruction instr,
+ bool is_write) {
const auto addr_register{GetRegister(instr.gmem.gpr)};
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
const auto [base_address, index, offset] =
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
- ASSERT(base_address != nullptr);
+ ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
+ { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+ "Global memory tracking failed");
bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46e0f823..17cd45d3c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::MOV_SYS: {
- const Node value = [&]() {
+ const Node value = [this, instr] {
switch (instr.sys20) {
case SystemVariable::Ydirection:
return Operation(OperationCode::YNegate);
@@ -256,7 +256,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::DEPBAR: {
- LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
+ LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default:
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index f6ee68a54..d419e9c45 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
Node op_a = GetRegister(instr.gpr8);
- Node op_b = [&]() {
+ Node op_b = [this, instr] {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b934a069..bb926a132 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex_b.texture_type};
const bool is_array = instr.tex_b.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
- if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
- }
-
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
@@ -96,6 +84,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::TLD4_B: {
+ is_bindless = true;
+ [[fallthrough]];
+ }
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
@@ -103,24 +95,20 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
- if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
- }
-
const auto texture_type = instr.tld4.texture_type.Value();
- const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+ const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
+ : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
- const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+ const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
+ : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
WriteTexInstructionFloat(
- bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
+ bb, instr,
+ GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
- }
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
@@ -141,7 +129,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
const auto& sampler =
- GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+ GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -150,25 +138,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
- WriteTexsInstructionFloat(bb, instr, values);
+ WriteTexsInstructionFloat(bb, instr, values, true);
break;
}
case OpCode::Id::TXQ_B:
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TXQ: {
- if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
- }
-
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
- is_bindless
- ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
- false)
- : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+ is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
u32 indexer = 0;
switch (instr.txq.query_type) {
@@ -201,15 +182,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
- if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
- }
-
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler = is_bindless
- ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
- : GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler =
+ is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, false}});
std::vector<Node> coords;
@@ -250,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
- if (instr.tld.nodep_flag) {
- LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
- }
-
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
- const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+ const TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
- if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
- }
-
const Node4 components = GetTldsCode(instr, texture_type, is_array);
if (instr.tlds.fp32_flag) {
@@ -285,48 +254,84 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc;
}
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
- bool is_array, bool is_shadow) {
- const auto offset = static_cast<std::size_t>(sampler.index.Value());
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+ std::optional<SamplerInfo> sampler_info) {
+ const auto offset = static_cast<u32>(sampler.index.Value());
+
+ TextureType type;
+ bool is_array;
+ bool is_shadow;
+ if (sampler_info) {
+ type = sampler_info->type;
+ is_array = sampler_info->is_array;
+ is_shadow = sampler_info->is_shadow;
+ } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
+ type = sampler->texture_type.Value();
+ is_array = sampler->is_array.Value() != 0;
+ is_shadow = sampler->is_shadow.Value() != 0;
+ } else {
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
+ is_array = false;
+ is_shadow = false;
+ }
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == offset; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+ if (it != used_samplers.end()) {
+ ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
}
-const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
- bool is_array, bool is_shadow) {
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+ std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
- const auto [base_sampler, cbuf_index, cbuf_offset] =
+ const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
- const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
+
+ TextureType type;
+ bool is_array;
+ bool is_shadow;
+ if (sampler_info) {
+ type = sampler_info->type;
+ is_array = sampler_info->is_array;
+ is_shadow = sampler_info->is_shadow;
+ } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
+ type = sampler->texture_type.Value();
+ is_array = sampler->is_array.Value() != 0;
+ is_shadow = sampler->is_shadow.Value() != 0;
+ } else {
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
+ is_array = false;
+ is_shadow = false;
+ }
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [buffer = buffer, offset = offset](const Sampler& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(
+ Sampler(next_index, offset, buffer, type, is_array, is_shadow));
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -344,14 +349,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
}
}
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
- const Node4& components) {
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+ bool ignore_mask) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
u32 dest_elem = 0;
for (u32 component = 0; component < 4; ++component) {
- if (!instr.texs.IsComponentEnabled(component))
+ if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
continue;
SetTemporary(bb, dest_elem++, components[component]);
}
@@ -411,9 +416,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
- const auto& sampler = is_bindless
- ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
- : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+ const auto& sampler =
+ is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
@@ -553,7 +558,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array, bool is_aoffi) {
+ bool is_array, bool is_aoffi, bool is_bindless) {
const std::size_t coord_count = GetCoordCount(texture_type);
// If enabled arrays index is always stored in the gpr8 field
@@ -567,6 +572,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
}
u64 parameter_register = instr.gpr20.Value();
+
+ const auto& sampler =
+ is_bindless
+ ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}})
+ : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
+
std::vector<Node> aoffi;
if (is_aoffi) {
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
@@ -577,12 +588,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
dc = GetRegister(parameter_register++);
}
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+ const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
+ : Immediate(static_cast<u32>(instr.tld4.component));
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
+ MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
+ element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -610,7 +623,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
// const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
// const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +659,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 97fc6f9b1..b047cf870 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
const Node op_a =
GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
instr.video.type_a, instr.video.byte_height_a);
- const Node op_b = [&]() {
+ const Node op_b = [this, instr] {
if (instr.video.use_register_b) {
return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
instr.video.signed_b, instr.video.type_b,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index a8e481b3c..d98d0e1dd 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
+
OperationCode GetOperationCode(VoteOperation vote_op) {
switch (vote_op) {
case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
return OperationCode::VoteAll;
}
}
+
} // Anonymous namespace
u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -48,47 +50,57 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
case OpCode::Id::SHFL: {
Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
: GetRegister(instr.gpr39);
- Node width = [&] {
- // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
- // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
- // different parameters that don't properly map to GLSL's interface, but it should work
- // for cases emitted by Nvidia's compiler.
- if (instr.shfl.operation == ShuffleOperation::Up) {
- return Operation(
- OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
- Immediate(8));
- } else {
- return Operation(OperationCode::ILogicalShiftRight,
- Operation(OperationCode::IAdd, Immediate(0x201F),
- Operation(OperationCode::INegate, std::move(mask))),
- Immediate(8));
- }
- }();
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+
+ Node thread_id = Operation(OperationCode::ThreadId);
+ Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+ Node seg_mask = BitfieldExtract(mask, 8, 16);
- const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+ Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+ Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+ Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
+
+ Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
switch (instr.shfl.operation) {
case ShuffleOperation::Idx:
- return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
- case ShuffleOperation::Up:
- return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ return Operation(OperationCode::IBitwiseOr,
+ Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+ min_thread_id);
case ShuffleOperation::Down:
- return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ return Operation(OperationCode::IAdd, thread_id, index);
+ case ShuffleOperation::Up:
+ return Operation(OperationCode::IAdd, thread_id,
+ Operation(OperationCode::INegate, index));
case ShuffleOperation::Bfly:
- return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ return Operation(OperationCode::IBitwiseXor, thread_id, index);
}
- UNREACHABLE_MSG("Invalid SHFL operation: {}",
- static_cast<u64>(instr.shfl.operation.Value()));
- return {};
+ UNREACHABLE();
+ return Immediate(0U);
}();
- // Setting the predicate before the register is intentional to avoid overwriting.
- Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
- : GetRegister(instr.gpr20);
- SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+ } else {
+ return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+ }
+ }();
+
+ SetPredicate(bb, instr.shfl.pred48, in_bounds);
SetRegister(
bb, instr.gpr0,
- Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
+ break;
+ }
+ case OpCode::Id::FSWZADD: {
+ UNIMPLEMENTED_IF(instr.fswzadd.ndv);
+
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = GetRegister(instr.gpr20);
+ Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
+ SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
break;
}
default:
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
new file mode 100644
index 000000000..2647865d4
--- /dev/null
+++ b/src/video_core/shader/expr.cpp
@@ -0,0 +1,93 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <variant>
+
+#include "video_core/shader/expr.h"
+
+namespace VideoCommon::Shader {
+namespace {
+bool ExprIsBoolean(const Expr& expr) {
+ return std::holds_alternative<ExprBoolean>(*expr);
+}
+
+bool ExprBooleanGet(const Expr& expr) {
+ return std::get_if<ExprBoolean>(expr.get())->value;
+}
+} // Anonymous namespace
+
+bool ExprAnd::operator==(const ExprAnd& b) const {
+ return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
+}
+
+bool ExprAnd::operator!=(const ExprAnd& b) const {
+ return !operator==(b);
+}
+
+bool ExprOr::operator==(const ExprOr& b) const {
+ return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
+}
+
+bool ExprOr::operator!=(const ExprOr& b) const {
+ return !operator==(b);
+}
+
+bool ExprNot::operator==(const ExprNot& b) const {
+ return *operand1 == *b.operand1;
+}
+
+bool ExprNot::operator!=(const ExprNot& b) const {
+ return !operator==(b);
+}
+
+Expr MakeExprNot(Expr first) {
+ if (std::holds_alternative<ExprNot>(*first)) {
+ return std::get_if<ExprNot>(first.get())->operand1;
+ }
+ return MakeExpr<ExprNot>(std::move(first));
+}
+
+Expr MakeExprAnd(Expr first, Expr second) {
+ if (ExprIsBoolean(first)) {
+ return ExprBooleanGet(first) ? second : first;
+ }
+ if (ExprIsBoolean(second)) {
+ return ExprBooleanGet(second) ? first : second;
+ }
+ return MakeExpr<ExprAnd>(std::move(first), std::move(second));
+}
+
+Expr MakeExprOr(Expr first, Expr second) {
+ if (ExprIsBoolean(first)) {
+ return ExprBooleanGet(first) ? first : second;
+ }
+ if (ExprIsBoolean(second)) {
+ return ExprBooleanGet(second) ? second : first;
+ }
+ return MakeExpr<ExprOr>(std::move(first), std::move(second));
+}
+
+bool ExprAreEqual(const Expr& first, const Expr& second) {
+ return (*first) == (*second);
+}
+
+bool ExprAreOpposite(const Expr& first, const Expr& second) {
+ if (std::holds_alternative<ExprNot>(*first)) {
+ return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
+ }
+ if (std::holds_alternative<ExprNot>(*second)) {
+ return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
+ }
+ return false;
+}
+
+bool ExprIsTrue(const Expr& first) {
+ if (ExprIsBoolean(first)) {
+ return ExprBooleanGet(first);
+ }
+ return false;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
new file mode 100644
index 000000000..4e8264367
--- /dev/null
+++ b/src/video_core/shader/expr.h
@@ -0,0 +1,156 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <variant>
+
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::ConditionCode;
+using Tegra::Shader::Pred;
+
+class ExprAnd;
+class ExprBoolean;
+class ExprCondCode;
+class ExprGprEqual;
+class ExprNot;
+class ExprOr;
+class ExprPredicate;
+class ExprVar;
+
+using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
+ ExprBoolean, ExprGprEqual>;
+using Expr = std::shared_ptr<ExprData>;
+
+class ExprAnd final {
+public:
+ explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
+
+ bool operator==(const ExprAnd& b) const;
+ bool operator!=(const ExprAnd& b) const;
+
+ Expr operand1;
+ Expr operand2;
+};
+
+class ExprOr final {
+public:
+ explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
+
+ bool operator==(const ExprOr& b) const;
+ bool operator!=(const ExprOr& b) const;
+
+ Expr operand1;
+ Expr operand2;
+};
+
+class ExprNot final {
+public:
+ explicit ExprNot(Expr a) : operand1{std::move(a)} {}
+
+ bool operator==(const ExprNot& b) const;
+ bool operator!=(const ExprNot& b) const;
+
+ Expr operand1;
+};
+
+class ExprVar final {
+public:
+ explicit ExprVar(u32 index) : var_index{index} {}
+
+ bool operator==(const ExprVar& b) const {
+ return var_index == b.var_index;
+ }
+
+ bool operator!=(const ExprVar& b) const {
+ return !operator==(b);
+ }
+
+ u32 var_index;
+};
+
+class ExprPredicate final {
+public:
+ explicit ExprPredicate(u32 predicate) : predicate{predicate} {}
+
+ bool operator==(const ExprPredicate& b) const {
+ return predicate == b.predicate;
+ }
+
+ bool operator!=(const ExprPredicate& b) const {
+ return !operator==(b);
+ }
+
+ u32 predicate;
+};
+
+class ExprCondCode final {
+public:
+ explicit ExprCondCode(ConditionCode cc) : cc{cc} {}
+
+ bool operator==(const ExprCondCode& b) const {
+ return cc == b.cc;
+ }
+
+ bool operator!=(const ExprCondCode& b) const {
+ return !operator==(b);
+ }
+
+ ConditionCode cc;
+};
+
+class ExprBoolean final {
+public:
+ explicit ExprBoolean(bool val) : value{val} {}
+
+ bool operator==(const ExprBoolean& b) const {
+ return value == b.value;
+ }
+
+ bool operator!=(const ExprBoolean& b) const {
+ return !operator==(b);
+ }
+
+ bool value;
+};
+
+class ExprGprEqual final {
+public:
+ ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
+
+ bool operator==(const ExprGprEqual& b) const {
+ return gpr == b.gpr && value == b.value;
+ }
+
+ bool operator!=(const ExprGprEqual& b) const {
+ return !operator==(b);
+ }
+
+ u32 gpr;
+ u32 value;
+};
+
+template <typename T, typename... Args>
+Expr MakeExpr(Args&&... args) {
+ static_assert(std::is_convertible_v<T, ExprData>);
+ return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
+}
+
+bool ExprAreEqual(const Expr& first, const Expr& second);
+
+bool ExprAreOpposite(const Expr& first, const Expr& second);
+
+Expr MakeExprNot(Expr first);
+
+Expr MakeExprAnd(Expr first, Expr second);
+
+Expr MakeExprOr(Expr first, Expr second);
+
+bool ExprIsTrue(const Expr& first);
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 338bab17c..54217e6a4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -47,6 +47,7 @@ enum class OperationCode {
FTrunc, /// (MetaArithmetic, float a) -> float
FCastInteger, /// (MetaArithmetic, int a) -> float
FCastUInteger, /// (MetaArithmetic, uint a) -> float
+ FSwizzleAdd, /// (float a, float b, uint mask) -> float
IAdd, /// (MetaArithmetic, int a, int b) -> int
IMul, /// (MetaArithmetic, int a, int b) -> int
@@ -181,15 +182,8 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
- ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
- ShuffleUp, /// (uint value, uint index, uint width) -> uint
- ShuffleDown, /// (uint value, uint index, uint width) -> uint
- ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
-
- InRangeShuffleIndexed, /// (uint index, uint width) -> bool
- InRangeShuffleUp, /// (uint index, uint width) -> bool
- InRangeShuffleDown, /// (uint index, uint width) -> bool
- InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+ ThreadId, /// () -> uint
+ ShuffleIndexed, /// (uint value, uint index) -> uint
Amount,
};
@@ -230,62 +224,49 @@ using NodeBlock = std::vector<Node>;
class Sampler {
public:
/// This constructor is for bound samplers
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{false} {}
+ constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
/// This constructor is for bindless samplers
- explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
- /// This constructor is for serialization/deserialization
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{is_bindless} {}
-
- std::size_t GetOffset() const {
+ constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_bindless{true} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
- Tegra::Shader::TextureType GetType() const {
+ constexpr Tegra::Shader::TextureType GetType() const {
return type;
}
- bool IsArray() const {
+ constexpr bool IsArray() const {
return is_array;
}
- bool IsShadow() const {
+ constexpr bool IsShadow() const {
return is_shadow;
}
- bool IsBindless() const {
+ constexpr bool IsBindless() const {
return is_bindless;
}
- std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- bool operator<(const Sampler& rhs) const {
- return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
- std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
- rhs.is_bindless);
- }
-
private:
- /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
- /// instruction.
- std::size_t offset{};
- std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+ u32 index{}; ///< Emulated index given for the this sampler.
+ u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
+ u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
@@ -294,18 +275,13 @@ private:
class Image final {
public:
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+ /// This constructor is for bound images
+ constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, type{type} {}
- constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
-
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless, bool is_written, bool is_read, bool is_atomic)
- : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
- is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {}
+ /// This constructor is for bindless samplers
+ constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
void MarkWrite() {
is_written = true;
@@ -321,12 +297,16 @@ public:
is_atomic = true;
}
- constexpr std::size_t GetOffset() const {
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- constexpr std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
constexpr Tegra::Shader::ImageType GetType() const {
@@ -349,18 +329,11 @@ public:
return is_atomic;
}
- constexpr std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- constexpr bool operator<(const Image& rhs) const {
- return std::tie(offset, index, type, is_bindless) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
- }
-
private:
- u64 offset{};
- std::size_t index{};
+ u32 index{};
+ u32 offset{};
+ u32 buffer{};
+
Tegra::Shader::ImageType type{};
bool is_bindless{};
bool is_written{};
@@ -410,7 +383,7 @@ public:
explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
explicit OperationNode(OperationCode code, Meta meta)
- : OperationNode(code, meta, std::vector<Node>{}) {}
+ : OperationNode(code, std::move(meta), std::vector<Node>{}) {}
explicit OperationNode(OperationCode code, std::vector<Node> operands)
: OperationNode(code, Meta{}, std::move(operands)) {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 2c357f310..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -2,8 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
+#include <array>
#include <cmath>
-#include <unordered_map>
#include "common/assert.h"
#include "common/common_types.h"
@@ -22,8 +23,9 @@ using Tegra::Shader::PredCondition;
using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
- : program_code{program_code}, main_offset{main_offset}, program_size{size} {
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+ ConstBufferLocker& locker)
+ : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
Decode();
}
@@ -137,7 +139,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
}
-Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
+Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
const Node node = MakeNode<InternalFlagNode>(flag);
if (negated) {
return Operation(OperationCode::LogicalNegate, node);
@@ -269,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
}
Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::LogicalFLessThan},
- {PredCondition::Equal, OperationCode::LogicalFEqual},
- {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
- {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
- {PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
- {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
- {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
+ std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
@@ -304,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::LogicalILessThan},
- {PredCondition::Equal, OperationCode::LogicalIEqual},
- {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
- {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
- {PredCondition::NotEqual, OperationCode::LogicalINotEqual},
- {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
- {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
- {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
- {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
- {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
@@ -335,45 +343,52 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
Node op_b) {
- const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
- {PredCondition::LessThan, OperationCode::Logical2HLessThan},
- {PredCondition::Equal, OperationCode::Logical2HEqual},
- {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
- {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
- {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
- {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
- {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
- {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
- {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
- {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
- {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
-
- const auto comparison{PredicateComparisonTable.find(condition)};
- UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+ static constexpr std::array comparison_table{
+ std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
+ std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
+ std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
+ std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
+ std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
+ std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
+ std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
+ std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
+ std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
+ std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
+ std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+ };
+
+ const auto comparison =
+ std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+ [condition](const auto entry) { return condition == entry.first; });
+ UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
"Unknown predicate comparison operation");
return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
}
OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
- const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
- {PredOperation::And, OperationCode::LogicalAnd},
- {PredOperation::Or, OperationCode::LogicalOr},
- {PredOperation::Xor, OperationCode::LogicalXor},
+ static constexpr std::array operation_table{
+ OperationCode::LogicalAnd,
+ OperationCode::LogicalOr,
+ OperationCode::LogicalXor,
};
- const auto op = PredicateOperationTable.find(operation);
- UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
- return op->second;
+ const auto index = static_cast<std::size_t>(operation);
+ if (index >= operation_table.size()) {
+ UNIMPLEMENTED_MSG("Unknown predicate operation.");
+ return {};
+ }
+
+ return operation_table[index];
}
-Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
+Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
switch (cc) {
case Tegra::Shader::ConditionCode::NEU:
return GetInternalFlag(InternalFlag::Zero, true);
default:
UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
- return GetPredicate(static_cast<u64>(Pred::NeverExecute));
+ return MakeNode<PredicateNode>(Pred::NeverExecute, false);
}
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6f666ee30..76a849818 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <list>
#include <map>
#include <optional>
#include <set>
@@ -15,6 +16,9 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
namespace VideoCommon::Shader {
@@ -45,7 +49,7 @@ public:
}
u32 GetSize() const {
- return max_offset + sizeof(float);
+ return max_offset + static_cast<u32>(sizeof(float));
}
u32 GetMaxOffset() const {
@@ -64,7 +68,8 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
- explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
+ explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+ ConstBufferLocker& locker);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -91,11 +96,11 @@ public:
return used_cbufs;
}
- const std::set<Sampler>& GetSamplers() const {
+ const std::list<Sampler>& GetSamplers() const {
return used_samplers;
}
- const std::map<u64, Image>& GetImages() const {
+ const std::list<Image>& GetImages() const {
return used_images;
}
@@ -144,11 +149,38 @@ public:
return disable_flow_stack;
}
- u32 ConvertAddressToNvidiaSpace(const u32 address) const {
- return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
+ bool IsDecompiled() const {
+ return decompiled;
}
+ const ASTManager& GetASTManager() const {
+ return program_manager;
+ }
+
+ ASTNode GetASTProgram() const {
+ return program_manager.GetProgram();
+ }
+
+ u32 GetASTNumVariables() const {
+ return program_manager.GetVariables();
+ }
+
+ u32 ConvertAddressToNvidiaSpace(u32 address) const {
+ return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
+ }
+
+ /// Returns a condition code evaluated from internal flags
+ Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
+
private:
+ friend class ASTDecoder;
+
+ struct SamplerInfo {
+ Tegra::Shader::TextureType type;
+ bool is_array;
+ bool is_shadow;
+ };
+
void Decode();
NodeBlock DecodeRange(u32 begin, u32 end);
@@ -213,7 +245,7 @@ private:
/// Generates a node representing an output attribute. Keeps track of used attributes.
Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
/// Generates a node representing an internal flag
- Node GetInternalFlag(InternalFlag flag, bool negated = false);
+ Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
/// Generates a node representing a local memory address
Node GetLocalMemory(Node address);
/// Generates a node representing a shared memory address
@@ -271,17 +303,13 @@ private:
/// Returns a predicate combiner operation
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
- /// Returns a condition code evaluated from internal flags
- Node GetConditionCode(Tegra::Shader::ConditionCode cc);
-
/// Accesses a texture sampler
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+ std::optional<SamplerInfo> sampler_info);
// Accesses a texture sampler for a bindless texture.
const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
- Tegra::Shader::TextureType type, bool is_array,
- bool is_shadow);
+ std::optional<SamplerInfo> sampler_info);
/// Accesses an image.
Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -289,9 +317,6 @@ private:
/// Access a bindless image sampler.
Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
- /// Tries to access an existing image, updating it's state as needed
- Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type);
-
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -302,7 +327,7 @@ private:
const Node4& components);
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components);
+ const Node4& components, bool ignore_mask = false);
void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
@@ -316,7 +341,7 @@ private:
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array, bool is_aoffi);
+ bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
Node4 GetTldCode(Tegra::Shader::Instruction instr);
@@ -351,12 +376,16 @@ private:
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) const;
- std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
- NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
+ std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
+ Tegra::Shader::Instruction instr,
+ bool is_write);
const ProgramCode& program_code;
const u32 main_offset;
- const std::size_t program_size;
+ const CompilerSettings settings;
+ ConstBufferLocker& locker;
+
+ bool decompiled{};
bool disable_flow_stack{};
u32 coverage_begin{};
@@ -364,14 +393,15 @@ private:
std::map<u32, NodeBlock> basic_blocks;
NodeBlock global_code;
+ ASTManager program_manager{true, true};
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
- std::set<Sampler> used_samplers;
- std::map<u64, Image> used_images;
+ std::list<Sampler> used_samplers;
+ std::list<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};