aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/control_flow.cpp214
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp11
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp4
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp8
-rw-r--r--src/video_core/shader/decode/ffma.cpp4
-rw-r--r--src/video_core/shader/decode/half_set.cpp4
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp4
-rw-r--r--src/video_core/shader/decode/image.cpp50
-rw-r--r--src/video_core/shader/decode/texture.cpp101
-rw-r--r--src/video_core/shader/node.h101
-rw-r--r--src/video_core/shader/shader_ir.h14
11 files changed, 234 insertions, 281 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index d47c63d9f..b427ac873 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -16,7 +16,9 @@
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
+
namespace {
+
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -68,15 +70,15 @@ struct CFGRebuildState {
const ProgramCode& program_code;
ConstBufferLocker& locker;
u32 start{};
- std::vector<BlockInfo> block_info{};
- std::list<u32> inspect_queries{};
- std::list<Query> queries{};
- std::unordered_map<u32, u32> registered{};
- std::set<u32> labels{};
- std::map<u32, u32> ssy_labels{};
- std::map<u32, u32> pbk_labels{};
- std::unordered_map<u32, BlockStack> stacks{};
- ASTManager* manager;
+ std::vector<BlockInfo> block_info;
+ std::list<u32> inspect_queries;
+ std::list<Query> queries;
+ std::unordered_map<u32, u32> registered;
+ std::set<u32> labels;
+ std::map<u32, u32> ssy_labels;
+ std::map<u32, u32> pbk_labels;
+ std::unordered_map<u32, BlockStack> stacks;
+ ASTManager* manager{};
};
enum class BlockCollision : u32 { None, Found, Inside };
@@ -109,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
}
Pred GetPredicate(u32 index, bool negated) {
- return static_cast<Pred>(index + (negated ? 8 : 0));
+ return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
}
/**
@@ -136,15 +138,13 @@ struct BranchIndirectInfo {
s32 relative_position{};
};
-std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
- u32 start_address, u32 current_position) {
- const u32 shader_start = state.start;
- u32 pos = current_position;
- BranchIndirectInfo result{};
- u64 track_register = 0;
+struct BufferInfo {
+ u32 index;
+ u32 offset;
+};
- // Step 0 Get BRX Info
- const Instruction instr = {state.program_code[pos]};
+std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
+ const Instruction instr = state.program_code[pos];
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() != OpCode::Id::BRX) {
return std::nullopt;
@@ -152,86 +152,94 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
if (instr.brx.constant_buffer != 0) {
return std::nullopt;
}
- track_register = instr.gpr8.Value();
- result.relative_position = instr.brx.GetBranchExtend();
- pos--;
- bool found_track = false;
+ --pos;
+ return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
+}
- // Step 1 Track LDC
- while (pos >= shader_start) {
- if (IsSchedInstruction(pos, shader_start)) {
- pos--;
+template <typename Result, typename TestCallable, typename PackCallable>
+// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
+// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
+std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
+ PackCallable pack) {
+ for (; pos >= state.start; --pos) {
+ if (IsSchedInstruction(pos, state.start)) {
continue;
}
- const Instruction instr = {state.program_code[pos]};
+ const Instruction instr = state.program_code[pos];
const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() == OpCode::Id::LD_C) {
- if (instr.gpr0.Value() == track_register &&
- instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
- result.buffer = instr.cbuf36.index.Value();
- result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
- track_register = instr.gpr8.Value();
- pos--;
- found_track = true;
- break;
- }
+ if (!opcode) {
+ continue;
+ }
+ if (test(instr, opcode->get())) {
+ --pos;
+ return std::make_optional(pack(instr, opcode->get()));
}
- pos--;
}
+ return std::nullopt;
+}
- if (!found_track) {
- return std::nullopt;
- }
- found_track = false;
+std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
+ u64 brx_tracked_register) {
+ return TrackInstruction<std::pair<BufferInfo, u64>>(
+ state, pos,
+ [brx_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::LD_C &&
+ instr.gpr0.Value() == brx_tracked_register &&
+ instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
+ },
+ [](auto instr, const auto& opcode) {
+ const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
+ static_cast<u32>(instr.cbuf36.GetOffset())};
+ return std::make_pair(info, instr.gpr8.Value());
+ });
+}
- // Step 2 Track SHL
- while (pos >= shader_start) {
- if (IsSchedInstruction(pos, shader_start)) {
- pos--;
- continue;
- }
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
- if (instr.gpr0.Value() == track_register) {
- track_register = instr.gpr8.Value();
- pos--;
- found_track = true;
- break;
- }
- }
- pos--;
+std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
+ u64 ldc_tracked_register) {
+ return TrackInstruction<u64>(state, pos,
+ [ldc_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::SHL_IMM &&
+ instr.gpr0.Value() == ldc_tracked_register;
+ },
+ [](auto instr, const auto&) { return instr.gpr8.Value(); });
+}
+
+std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
+ u64 shl_tracked_register) {
+ return TrackInstruction<u32>(state, pos,
+ [shl_tracked_register](auto instr, const auto& opcode) {
+ return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+ instr.gpr0.Value() == shl_tracked_register;
+ },
+ [](auto instr, const auto&) {
+ return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+ });
+}
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
+ const auto brx_info = GetBRXInfo(state, pos);
+ if (!brx_info) {
+ return std::nullopt;
}
+ const auto [relative_position, brx_tracked_register] = *brx_info;
- if (!found_track) {
+ const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
+ if (!ldc_info) {
return std::nullopt;
}
- found_track = false;
+ const auto [buffer_info, ldc_tracked_register] = *ldc_info;
- // Step 3 Track IMNMX
- while (pos >= shader_start) {
- if (IsSchedInstruction(pos, shader_start)) {
- pos--;
- continue;
- }
- const Instruction instr = state.program_code[pos];
- const auto opcode = OpCode::Decode(instr);
- if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
- if (instr.gpr0.Value() == track_register) {
- track_register = instr.gpr8.Value();
- result.entries = instr.alu.GetSignedImm20_20() + 1;
- pos--;
- found_track = true;
- break;
- }
- }
- pos--;
+ const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
+ if (!shl_tracked_register) {
+ return std::nullopt;
}
- if (!found_track) {
+ const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
+ if (!entries) {
return std::nullopt;
}
- return result;
+
+ return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
}
std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
@@ -420,30 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
break;
}
case OpCode::Id::BRX: {
- auto tmp = TrackBranchIndirectInfo(state, address, offset);
- if (tmp) {
- auto result = *tmp;
- std::vector<CaseBranch> branches{};
- s32 pc_target = offset + result.relative_position;
- for (u32 i = 0; i < result.entries; i++) {
- auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
- if (!k) {
- return {ParseResult::AbnormalFlow, parse_info};
- }
- u32 value = *k;
- u32 target = static_cast<u32>((value >> 3) + pc_target);
- insert_label(state, target);
- branches.emplace_back(value, target);
- }
- parse_info.end_address = offset;
- parse_info.branch_info = MakeBranchInfo<MultiBranch>(
- static_cast<u32>(instr.gpr8.Value()), std::move(branches));
-
- return {ParseResult::ControlCaught, parse_info};
- } else {
+ const auto tmp = TrackBranchIndirectInfo(state, offset);
+ if (!tmp) {
LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+ return {ParseResult::AbnormalFlow, parse_info};
}
- return {ParseResult::AbnormalFlow, parse_info};
+
+ const auto result = *tmp;
+ const s32 pc_target = offset + result.relative_position;
+ std::vector<CaseBranch> branches;
+ for (u32 i = 0; i < result.entries; i++) {
+ auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ if (!key) {
+ return {ParseResult::AbnormalFlow, parse_info};
+ }
+ u32 value = *key;
+ u32 target = static_cast<u32>((value >> 3) + pc_target);
+ insert_label(state, target);
+ branches.emplace_back(value, target);
+ }
+ parse_info.end_address = offset;
+ parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+ static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+ return {ParseResult::ControlCaught, parse_info};
}
default:
break;
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
if (instr.fmul.tab5cb8_2 != 0) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
- instr.fmul.tab5cb8_2.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
}
if (instr.fmul.tab5c68_0 != 1) {
- LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
- instr.fmul.tab5c68_0.Value());
+ LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0.Value());
}
op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
case OpCode::Id::RRO_C:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
+ LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
// Currently RRO is only implemented as a register move.
op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
SetRegister(bb, instr.gpr0, op_b);
- LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
}
default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
opcode->get().GetId() == OpCode::Id::HADD2_R) {
- if (instr.alu_half.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
const auto opcode = OpCode::Decode(instr);
if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
- if (instr.alu_half_imm.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
} else {
- if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
}
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
if (instr.ffma.tab5980_0 != 1) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
}
if (instr.ffma.tab5980_1 != 0) {
- LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+ LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
}
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz != 0) {
- LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+ if (instr.hset2.ftz == 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index fec8f2dbe..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- LOG_DEBUG(HW_GPU, "ftz={}", static_cast<u32>(instr.hsetp2.ftz));
+ if (instr.hsetp2.ftz != 0) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index b02d2cb95..d2fe4ec5d 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
}
Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
- const auto offset{static_cast<std::size_t>(image.index.Value())};
- if (const auto existing_image = TryUseExistingImage(offset, type)) {
- return *existing_image;
+ const auto offset = static_cast<u32>(image.index.Value());
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [offset](const Image& entry) { return entry.GetOffset() == offset; });
+ if (it != std::end(used_images)) {
+ ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, type);
}
Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
- const Node image_register{GetRegister(reg)};
- const auto [base_image, cbuf_index, cbuf_offset]{
- TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
- const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
-
- if (const auto image = TryUseExistingImage(cbuf_key, type)) {
- return *image;
- }
-
- const std::size_t next_index{used_images.size()};
- return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
- .first->second;
-}
-
-Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) {
- auto it = used_images.find(offset);
- if (it == used_images.end()) {
- return nullptr;
+ const Node image_register = GetRegister(reg);
+ const auto [base_image, buffer, offset] =
+ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+ const auto it =
+ std::find_if(std::begin(used_images), std::end(used_images),
+ [buffer = buffer, offset = offset](const Image& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != std::end(used_images)) {
+ ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+ return *it;
}
- auto& image = it->second;
- ASSERT(image.GetType() == type);
- return &image;
+ const auto next_index = static_cast<u32>(used_images.size());
+ return used_images.emplace_back(next_index, offset, buffer, type);
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0599ef34f..bb926a132 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
- }
-
const TextureType texture_type{instr.tex_b.texture_type};
const bool is_array = instr.tex_b.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
- if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
- }
-
const Node4 components =
GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
@@ -107,10 +95,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
"PTP is not implemented");
- if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
- }
-
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
: instr.tld4.UsesMiscMode(TextureMiscMode::DC);
@@ -119,15 +103,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
: instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
WriteTexInstructionFloat(
bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless), true);
+ GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
break;
}
case OpCode::Id::TLD4S: {
UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
- }
const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
@@ -164,10 +145,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TXQ: {
- if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
- }
-
// TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
@@ -205,10 +182,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
- if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
- }
-
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler =
@@ -254,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
- if (instr.tld.nodep_flag) {
- LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
- }
-
WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
break;
}
case OpCode::Id::TLDS: {
- const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+ const TextureType texture_type{instr.tlds.GetTextureType()};
const bool is_array{instr.tlds.IsArrayTexture()};
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
- if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
- LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
- }
-
const Node4 components = GetTldsCode(instr, texture_type, is_array);
if (instr.tlds.fp32_flag) {
@@ -293,84 +258,86 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
std::optional<SamplerInfo> sampler_info) {
const auto offset = static_cast<u32>(sampler.index.Value());
- Tegra::Shader::TextureType type;
+ TextureType type;
bool is_array;
bool is_shadow;
if (sampler_info) {
type = sampler_info->type;
is_array = sampler_info->is_array;
is_shadow = sampler_info->is_shadow;
- } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
+ } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
type = sampler->texture_type.Value();
is_array = sampler->is_array.Value() != 0;
is_shadow = sampler->is_shadow.Value() != 0;
} else {
- type = Tegra::Shader::TextureType::Texture2D;
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
is_array = false;
is_shadow = false;
}
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == offset; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+ if (it != used_samplers.end()) {
+ ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
-} // namespace VideoCommon::Shader
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
+}
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
std::optional<SamplerInfo> sampler_info) {
const Node sampler_register = GetRegister(reg);
- const auto [base_sampler, cbuf_index, cbuf_offset] =
+ const auto [base_sampler, buffer, offset] =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_sampler != nullptr);
- const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
- Tegra::Shader::TextureType type;
+
+ TextureType type;
bool is_array;
bool is_shadow;
if (sampler_info) {
type = sampler_info->type;
is_array = sampler_info->is_array;
is_shadow = sampler_info->is_shadow;
- } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
+ } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
type = sampler->texture_type.Value();
is_array = sampler->is_array.Value() != 0;
is_shadow = sampler->is_shadow.Value() != 0;
} else {
- type = Tegra::Shader::TextureType::Texture2D;
+ LOG_WARNING(HW_GPU, "Unknown sampler info");
+ type = TextureType::Texture2D;
is_array = false;
is_shadow = false;
}
// If this sampler has already been used, return the existing mapping.
- const auto itr =
+ const auto it =
std::find_if(used_samplers.begin(), used_samplers.end(),
- [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
- if (itr != used_samplers.end()) {
- ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
- itr->IsShadow() == is_shadow);
- return *itr;
+ [buffer = buffer, offset = offset](const Sampler& entry) {
+ return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+ });
+ if (it != used_samplers.end()) {
+ ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+ it->IsShadow() == is_shadow);
+ return *it;
}
// Otherwise create a new mapping for this sampler
- const std::size_t next_index = used_samplers.size();
- const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
- return *used_samplers.emplace(entry).first;
+ const auto next_index = static_cast<u32>(used_samplers.size());
+ return used_samplers.emplace_back(
+ Sampler(next_index, offset, buffer, type, is_array, is_shadow));
}
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
- bool is_tld4) {
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
- if (!is_tld4 && !instr.tex.IsComponentEnabled(elem)) {
+ if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 447fb5c1d..4300d9ff4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -230,62 +230,49 @@ using NodeBlock = std::vector<Node>;
class Sampler {
public:
/// This constructor is for bound samplers
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{false} {}
+ constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
/// This constructor is for bindless samplers
- explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
- /// This constructor is for serialization/deserialization
- explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
- bool is_array, bool is_shadow, bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
- is_bindless{is_bindless} {}
-
- std::size_t GetOffset() const {
+ constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
+ is_shadow{is_shadow}, is_bindless{true} {}
+
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
- Tegra::Shader::TextureType GetType() const {
+ constexpr Tegra::Shader::TextureType GetType() const {
return type;
}
- bool IsArray() const {
+ constexpr bool IsArray() const {
return is_array;
}
- bool IsShadow() const {
+ constexpr bool IsShadow() const {
return is_shadow;
}
- bool IsBindless() const {
+ constexpr bool IsBindless() const {
return is_bindless;
}
- std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- bool operator<(const Sampler& rhs) const {
- return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
- std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
- rhs.is_bindless);
- }
-
private:
- /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
- /// instruction.
- std::size_t offset{};
- std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+ u32 index{}; ///< Emulated index given for the this sampler.
+ u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
+ u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
@@ -294,18 +281,13 @@ private:
class Image final {
public:
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
-
- constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
- : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
+ /// This constructor is for bound images
+ constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, type{type} {}
- constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless, bool is_written, bool is_read, bool is_atomic)
- : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
- is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {}
+ /// This constructor is for bindless samplers
+ constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
+ : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
void MarkWrite() {
is_written = true;
@@ -321,12 +303,16 @@ public:
is_atomic = true;
}
- constexpr std::size_t GetOffset() const {
+ constexpr u32 GetIndex() const {
+ return index;
+ }
+
+ constexpr u32 GetOffset() const {
return offset;
}
- constexpr std::size_t GetIndex() const {
- return index;
+ constexpr u32 GetBuffer() const {
+ return buffer;
}
constexpr Tegra::Shader::ImageType GetType() const {
@@ -349,18 +335,11 @@ public:
return is_atomic;
}
- constexpr std::pair<u32, u32> GetBindlessCBuf() const {
- return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
- }
-
- constexpr bool operator<(const Image& rhs) const {
- return std::tie(offset, index, type, is_bindless) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
- }
-
private:
- u64 offset{};
- std::size_t index{};
+ u32 index{};
+ u32 offset{};
+ u32 buffer{};
+
Tegra::Shader::ImageType type{};
bool is_bindless{};
bool is_written{};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 7582999a5..26c8fde22 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,6 +5,7 @@
#pragma once
#include <array>
+#include <list>
#include <map>
#include <optional>
#include <set>
@@ -95,11 +96,11 @@ public:
return used_cbufs;
}
- const std::set<Sampler>& GetSamplers() const {
+ const std::list<Sampler>& GetSamplers() const {
return used_samplers;
}
- const std::map<u64, Image>& GetImages() const {
+ const std::list<Image>& GetImages() const {
return used_images;
}
@@ -316,9 +317,6 @@ private:
/// Access a bindless image sampler.
Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
- /// Tries to access an existing image, updating it's state as needed
- Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type);
-
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -326,7 +324,7 @@ private:
Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
- const Node4& components, bool is_tld4 = false);
+ const Node4& components);
void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components, bool ignore_mask = false);
@@ -402,8 +400,8 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
- std::set<Sampler> used_samplers;
- std::map<u64, Image> used_images;
+ std::list<Sampler> used_samplers;
+ std::list<Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};