From 8af6e6a05207b1c9736bd80a89ec3aed1f96dfea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Jun 2019 19:46:49 -0400 Subject: shader_ir: Implement a new shader scanner --- src/video_core/shader/decode.cpp | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..7f433c56b 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/control_flow.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - std::set labels; - const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); - if (exit_method != ExitMethod::AlwaysEnd) { - UNREACHABLE_MSG("Program does not always end"); - } - - if (labels.empty()) { - basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); + ShaderCharacteristics shader_info{}; + bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); + if (can_proceed) { + coverage_begin = shader_info.start; + coverage_end = shader_info.end; + if (shader_info.decompilable) { + return; + } + // we can't decompile it, fallback to standard method + for (const auto& block : shader_info.blocks) { + basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); + } return; } - - labels.insert(main_offset); - - for (const u32 label : labels) { - const auto next_it = labels.lower_bound(label + 1); - const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; - - basic_blocks.insert({label, DecodeRange(label, next_label)}); + LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); + + // Now we need to deal with an undecompilable shader. We need to brute force + // a shader that captures every position. + coverage_begin = shader_info.start; + const u32 shader_end = static_cast(MAX_PROGRAM_LENGTH); + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; label++) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); } + return; } ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set& labels) { -- cgit v1.2.3 From c218ae4b022a9b47366e88441220fa6c66bcae4b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Jun 2019 21:01:49 -0400 Subject: shader_ir: Remove the old scanner. --- src/video_core/shader/decode.cpp | 66 ---------------------------------------- 1 file changed, 66 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 7f433c56b..65029d35e 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -22,20 +22,6 @@ using Tegra::Shader::OpCode; namespace { -/// Merges exit method of two parallel branches. -constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { - if (a == ExitMethod::Undetermined) { - return b; - } - if (b == ExitMethod::Undetermined) { - return a; - } - if (a == b) { - return a; - } - return ExitMethod::Conditional; -} - /** * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. @@ -79,58 +65,6 @@ void ShaderIR::Decode() { return; } -ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set& labels) { - const auto [iter, inserted] = - exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); - ExitMethod& exit_method = iter->second; - if (!inserted) - return exit_method; - - for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { - coverage_begin = std::min(coverage_begin, offset); - coverage_end = std::max(coverage_end, offset + 1); - - const Instruction instr = {program_code[offset]}; - const auto opcode = OpCode::Decode(instr); - if (!opcode) - continue; - switch (opcode->get().GetId()) { - case OpCode::Id::EXIT: { - // The EXIT instruction can be predicated, which means that the shader can conditionally - // end on this instruction. We have to consider the case where the condition is not met - // and check the exit method of that other basic block. - using Tegra::Shader::Pred; - if (instr.pred.pred_index == static_cast(Pred::UnusedIndex)) { - return exit_method = ExitMethod::AlwaysEnd; - } else { - const ExitMethod not_met = Scan(offset + 1, end, labels); - return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); - } - } - case OpCode::Id::BRA: { - const u32 target = offset + instr.bra.GetBranchTarget(); - labels.insert(target); - const ExitMethod no_jmp = Scan(offset + 1, end, labels); - const ExitMethod jmp = Scan(target, end, labels); - return exit_method = ParallelExit(no_jmp, jmp); - } - case OpCode::Id::SSY: - case OpCode::Id::PBK: { - // The SSY and PBK use a similar encoding as the BRA instruction. - UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, - "Constant buffer branching is not supported"); - const u32 target = offset + instr.bra.GetBranchTarget(); - labels.insert(target); - // Continue scanning for an exit method. - break; - } - default: - break; - } - } - return exit_method = ExitMethod::AlwaysReturn; -} - NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { NodeBlock basic_block; for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { -- cgit v1.2.3 From 459fce3a8f26241ff2a68c323e75fb70e7e1ba79 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 07:57:32 -0400 Subject: shader_ir: propagate shader size to the IR --- src/video_core/shader/decode.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 65029d35e..09f55bd21 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); ShaderCharacteristics shader_info{}; - bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); + bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); if (can_proceed) { coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -52,12 +52,12 @@ void ShaderIR::Decode() { } return; } - LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); + LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. coverage_begin = shader_info.start; - const u32 shader_end = static_cast(MAX_PROGRAM_LENGTH); + const u32 shader_end = static_cast(program_size / sizeof(u64)); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); -- cgit v1.2.3 From 926b80102f1c00675a9f3956258a066bfe0c3642 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 11:10:45 -0400 Subject: shader_ir: Decompile Flow Stack --- src/video_core/shader/decode.cpp | 46 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 09f55bd21..1a74b70cb 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -44,6 +44,17 @@ void ShaderIR::Decode() { coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { + std::list& blocks = shader_info.blocks; + for (auto& block : blocks) { + NodeBlock nodes; + if (!block.ignore_branch) { + nodes = DecodeRange(block.start, block.end); + InsertControlFlow(nodes, block); + } else { + nodes = DecodeRange(block.start, block.end + 1); + } + basic_blocks.insert({block.start, nodes}); + } return; } // we can't decompile it, fallback to standard method @@ -73,6 +84,41 @@ NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { return basic_block; } +void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { + auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { + Node result = n; + if (cond.cc != ConditionCode::T) { + result = Conditional(GetConditionCode(cond.cc), {result}); + } + if (cond.predicate != Pred::UnusedIndex) { + u32 pred = static_cast(cond.predicate); + bool is_neg = pred > 7; + if (is_neg) + pred -= 8; + result = Conditional(GetPredicate(pred, is_neg), {result}); + } + return result; + }); + if (block.branch.address < 0) { + if (block.branch.kills) { + Node n = Operation(OperationCode::Discard); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Exit); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); + return; + } + Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); + n = apply_conditions(block.branch.cond, n); + bb.push_back(n); + global_code.push_back(n); +} + u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { // Ignore sched instructions when generating code. if (IsSchedInstruction(pc, main_offset)) { -- cgit v1.2.3 From d5533b440c764093c04a4859b30fc78ddb0e0bbe Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 13:03:51 -0400 Subject: shader_ir: Unify blocks in decompiled shaders. --- src/video_core/shader/decode.cpp | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 1a74b70cb..f9b1960da 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); + disable_flow_stack = false; ShaderCharacteristics shader_info{}; bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); if (can_proceed) { coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { + disable_flow_stack = true; + auto insert_block = ([this](NodeBlock& nodes, u32 label) { + if (label == exit_branch) { + return; + } + basic_blocks.insert({label, nodes}); + }); std::list& blocks = shader_info.blocks; + NodeBlock current_block; + u32 current_label = exit_branch; for (auto& block : blocks) { - NodeBlock nodes; + if (shader_info.labels.count(block.start) != 0) { + insert_block(current_block, current_label); + current_block.clear(); + current_label = block.start; + } if (!block.ignore_branch) { - nodes = DecodeRange(block.start, block.end); - InsertControlFlow(nodes, block); + DecodeRangeInner(current_block, block.start, block.end); + InsertControlFlow(current_block, block); } else { - nodes = DecodeRange(block.start, block.end + 1); + DecodeRangeInner(current_block, block.start, block.end + 1); } - basic_blocks.insert({block.start, nodes}); } + insert_block(current_block, current_label); return; } + LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method"); // we can't decompile it, fallback to standard method for (const auto& block : shader_info.blocks) { basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); } return; } - LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); + LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling"); // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. @@ -78,10 +93,14 @@ void ShaderIR::Decode() { NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { NodeBlock basic_block; + DecodeRangeInner(basic_block, begin, end); + return basic_block; +} + +void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { - pc = DecodeInstr(basic_block, pc); + pc = DecodeInstr(bb, pc); } - return basic_block; } void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { -- cgit v1.2.3 From 01b21ee1e8e7455dd84ee7f22d33426caaaafdb3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 20:15:40 -0400 Subject: shader_ir: Corrections, documenting and asserting control_flow --- src/video_core/shader/decode.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index f9b1960da..b4a282cbd 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -46,7 +46,7 @@ void ShaderIR::Decode() { coverage_end = shader_info.end; if (shader_info.decompilable) { disable_flow_stack = true; - auto insert_block = ([this](NodeBlock& nodes, u32 label) { + const auto insert_block = ([this](NodeBlock& nodes, u32 label) { if (label == exit_branch) { return; } @@ -88,7 +88,6 @@ void ShaderIR::Decode() { for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); } - return; } NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { @@ -104,16 +103,17 @@ void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { } void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { - auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { + const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node { Node result = n; if (cond.cc != ConditionCode::T) { result = Conditional(GetConditionCode(cond.cc), {result}); } if (cond.predicate != Pred::UnusedIndex) { u32 pred = static_cast(cond.predicate); - bool is_neg = pred > 7; - if (is_neg) + const bool is_neg = pred > 7; + if (is_neg) { pred -= 8; + } result = Conditional(GetPredicate(pred, is_neg), {result}); } return result; -- cgit v1.2.3 From d45fed303055fa699377bedcc3a7973bd03b7870 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 20:40:38 -0400 Subject: shader_ir: Remove unnecessary constructors and use optional for ScanFlow result --- src/video_core/shader/decode.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b4a282cbd..15cb33bbf 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,9 +39,9 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - ShaderCharacteristics shader_info{}; - bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); - if (can_proceed) { + const auto info = ScanFlow(program_code, program_code.size(), main_offset); + if (info) { + const auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; if (shader_info.decompilable) { @@ -52,7 +52,7 @@ void ShaderIR::Decode() { } basic_blocks.insert({label, nodes}); }); - std::list& blocks = shader_info.blocks; + const auto& blocks = shader_info.blocks; NodeBlock current_block; u32 current_label = exit_branch; for (auto& block : blocks) { @@ -82,7 +82,7 @@ void ShaderIR::Decode() { // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. - coverage_begin = shader_info.start; + coverage_begin = main_offset; const u32 shader_end = static_cast(program_size / sizeof(u64)); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { -- cgit v1.2.3 From cfb3db1a32975583b94a0df5f3ff0020254208c0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 20:56:04 -0400 Subject: shader_ir: Correct max sizing --- src/video_core/shader/decode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 15cb33bbf..b0bd6630f 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_code.size(), main_offset); + const auto info = ScanFlow(program_code, program_size, main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start; -- cgit v1.2.3 From 34357b110c3f04f6b98ca586fd776b0df569b6d8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 26 Jun 2019 12:19:43 -0400 Subject: shader_ir: Correct parsing of scheduling instructions and correct sizing --- src/video_core/shader/decode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b0bd6630f..07a154d77 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, program_size, main_offset); + const auto info = ScanFlow(program_code, MAX_PROGRAM_LENGTH * sizeof(u64), main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start; -- cgit v1.2.3 From 2de764931141314357944627524767476d6d2cf9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 29 Jun 2019 14:01:44 -0400 Subject: shader_ir: limit explorastion to best known program size. --- src/video_core/shader/decode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 07a154d77..b0bd6630f 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); disable_flow_stack = false; - const auto info = ScanFlow(program_code, MAX_PROGRAM_LENGTH * sizeof(u64), main_offset); + const auto info = ScanFlow(program_code, program_size, main_offset); if (info) { const auto& shader_info = *info; coverage_begin = shader_info.start; -- cgit v1.2.3 From f2549739d1166d9177bfff3a6af150266ba5309f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 5 Jul 2019 14:13:14 -0400 Subject: shader_ir: Add comments on missing instruction. Also shows Nvidia's address space on comments. --- src/video_core/shader/decode.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/video_core/shader/decode.cpp') diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index b0bd6630f..29c8895c5 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -146,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); + const u32 nv_address = ConvertAddressToNvidiaSpace(pc); // Decoding failure if (!opcode) { UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); + bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", + nv_address, instr.value))); return pc + 1; } - bb.push_back( - Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); + bb.push_back(Comment( + fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); using Tegra::Shader::Pred; UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, -- cgit v1.2.3