diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 118 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 217 | ||||
| -rw-r--r-- | src/video_core/shader/decode/xmad.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 64 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 17 |
5 files changed, 358 insertions, 97 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ea1092db1 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -18,6 +19,23 @@ using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; +namespace { +u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { + switch (uniform_type) { + case Tegra::Shader::UniformType::Single: + return 1; + case Tegra::Shader::UniformType::Double: + return 2; + case Tegra::Shader::UniformType::Quad: + case Tegra::Shader::UniformType::UnsignedQuad: + return 4; + default: + UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); + return 1; + } +} +} // namespace + u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -85,8 +103,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LD_L: { - UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", - static_cast<u32>(instr.ld_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", + static_cast<u64>(instr.ld_l.unknown.Value())); const auto GetLmem = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -126,45 +144,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::LDG: { - const u32 count = [&]() { - switch (instr.ldg.type) { - case Tegra::Shader::UniformType::Single: - return 1; - case Tegra::Shader::UniformType::Double: - return 2; - case Tegra::Shader::UniformType::Quad: - case Tegra::Shader::UniformType::UnsignedQuad: - return 4; - default: - UNIMPLEMENTED_MSG("Unimplemented LDG size!"); - return 1; - } - }(); - - const Node addr_register = GetRegister(instr.gpr8); - const Node base_address = - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back(Comment( - fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; - used_global_memory_bases.insert(descriptor); - - const Node immediate_offset = - Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); - const Node base_real_address = - Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.ldg.immediate_offset.Value()), false); + const u32 count = GetUniformTypeElementsCount(instr.ldg.type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); SetTemporal(bb, i, gmem); @@ -174,6 +162,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::STG: { + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), + static_cast<u32>(instr.stg.immediate_offset.Value()), true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(instr.stg.type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -205,8 +215,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ST_L: { - UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", - static_cast<u32>(instr.st_l.unknown.Value())); + LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", + static_cast<u64>(instr.st_l.cache_management.Value())); const auto GetLmemAddr = [&](s32 offset) { ASSERT(offset % 4 == 0); @@ -236,4 +246,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } +std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write) { + const Node base_address{ + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; + const auto cbuf = std::get_if<CbufNode>(base_address); + ASSERT(cbuf != nullptr); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + + bb.push_back( + Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); + + const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); + auto& usage = entry->second; + if (is_write) { + usage.is_written = true; + } else { + usage.is_read = true; + } + + const auto real_address = + Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); + + return {real_address, base_address, descriptor}; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a99ae19bf..fa65ac9a9 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -7,7 +7,9 @@ #include <fmt/format.h> #include "common/assert.h" +#include "common/bit_field.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -38,22 +40,39 @@ static std::size_t GetCoordCount(TextureType texture_type) { u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - + bool is_bindless = false; switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); } const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( - bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); + bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); + break; + } + case OpCode::Id::TEX_B: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex_b.texture_type}; + const bool is_array = instr.tex_b.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); + const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex_b.GetTextureProcessMode(); + WriteTexInstructionFloat(bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, + is_array, is_aoffi, {instr.gpr20})); break; } case OpCode::Id::TEXS: { @@ -78,8 +97,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), "NDV is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), @@ -92,8 +109,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array)); + const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + WriteTexInstructionFloat( + bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); break; } case OpCode::Id::TLD4S: { @@ -127,13 +145,16 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } WriteTexsInstructionFloat(bb, instr, values); break; } + case OpCode::Id::TXQ_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TXQ: { if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); @@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + is_bindless + ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, + false) + : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); u32 indexer = 0; switch (instr.txq.query_type) { @@ -152,9 +176,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + Operation(OperationCode::TextureQueryDimensions, meta, + GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); SetTemporal(bb, indexer++, value); } for (u32 i = 0; i < indexer; ++i) { @@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::TMML_B: + is_bindless = true; + [[fallthrough]]; case OpCode::Id::TMML: { UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), "NDV is not implemented"); @@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + const auto& sampler = is_bindless + ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) + : GetSampler(instr.sampler, texture_type, is_array, false); std::vector<Node> coords; @@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { coords.push_back(GetRegister(instr.gpr8.Value() + 1)); texture_type = TextureType::Texture2D; } - + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { + if (!instr.tmml.IsComponentEnabled(element)) { + continue; + } auto params = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, element, value); + SetTemporal(bb, indexer++, value); } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } - break; } case OpCode::Id::TLDS: { @@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } +const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, + bool is_array, bool is_shadow) { + const Node sampler_register = GetRegister(reg); + const Node base_sampler = + TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); + const auto cbuf = std::get_if<CbufNode>(base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + ASSERT(cbuf_offset_imm != nullptr); + const auto cbuf_offset = cbuf_offset_imm->GetValue(); + const auto cbuf_index = cbuf->GetIndex(); + const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset; + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { u32 dest_elem = 0; for (u32 elem = 0; elem < 4; ++elem) { @@ -325,22 +385,28 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset) { + Node array, Node depth_compare, u32 bias_offset, + std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg) { const bool is_array = array; const bool is_shadow = depth_compare; + const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || (texture_type == TextureType::TextureCube && is_array && is_shadow), "This method is not supported."); - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + const auto& sampler = is_bindless + ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) + : GetSampler(instr.sampler, texture_type, is_array, is_shadow); const bool lod_needed = process_mode == TextureProcessMode::LZ || process_mode == TextureProcessMode::LL || process_mode == TextureProcessMode::LLA; - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. + // LOD selection (either via bias or explicit textureLod) not + // supported in GL for sampler2DArrayShadow and + // samplerCubeArrayShadow. const bool gl_lod_supported = !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); @@ -358,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, lod = Immediate(0.0f); break; case TextureProcessMode::LB: - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers + // If present, lod or bias are always stored in the register + // indexed by the gpr20 field with an offset depending on the + // usage of the other registers bias = GetRegister(instr.gpr20.Value() + bias_offset); break; case TextureProcessMode::LL: @@ -374,7 +441,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; + MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -382,9 +449,22 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + TextureProcessMode process_mode, bool depth_compare, bool is_array, + bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { + const bool lod_bias_enabled{ + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + + const bool is_bindless = bindless_reg.has_value(); + + u64 parameter_register = instr.gpr20.Value(); + if (is_bindless) { + ++parameter_register; + } + + const u32 bias_lod_offset = (is_bindless ? 1 : 0); + if (lod_bias_enabled) { + ++parameter_register; + } const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); @@ -404,15 +484,20 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; + std::vector<Node> aoffi; + if (is_aoffi) { + aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); + } + Node dc{}; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); + dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, + aoffi, bindless_reg); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -448,11 +533,12 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, + {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array) { + bool is_array, bool is_aoffi) { const std::size_t coord_count = GetCoordCount(texture_type); const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); @@ -463,15 +549,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const u64 coord_register = array_register + (is_array ? 1 : 0); std::vector<Node> coords; - for (size_t i = 0; i < coord_count; ++i) + for (std::size_t i = 0; i < coord_count; ++i) { coords.push_back(GetRegister(coord_register + i)); + } + + u64 parameter_register = instr.gpr20.Value(); + std::vector<Node> aoffi; + if (is_aoffi) { + aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); + } + + Node dc{}; + if (depth_compare) { + dc = GetRegister(parameter_register++); + } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -507,7 +605,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; @@ -531,4 +629,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( return {coord_count, total_coord_count}; } -} // namespace VideoCommon::Shader
\ No newline at end of file +std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, + bool is_tld4) { + const auto [coord_offsets, size, wrap_value, + diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { + if (is_tld4) { + return {{0, 8, 16}, 6, 32, 64}; + } else { + return {{0, 4, 8}, 4, 8, 16}; + } + }(); + const u32 mask = (1U << size) - 1; + + std::vector<Node> aoffi; + aoffi.reserve(coord_count); + + const auto aoffi_immediate{ + TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; + if (!aoffi_immediate) { + // Variable access, not supported on AMD. + LOG_WARNING(HW_GPU, + "AOFFI constant folding failed, some hardware might have graphical issues"); + for (std::size_t coord = 0; coord < coord_count; ++coord) { + const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); + const Node condition = + Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); + const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); + aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); + } + return aoffi; + } + + for (std::size_t coord = 0; coord < coord_count; ++coord) { + s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; + if (value >= wrap_value) { + value -= diff_value; + } + aoffi.push_back(Immediate(value)); + } + return aoffi; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index c34843307..db15c0718 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const bool is_signed_b = instr.xmad.sign_b == 1; const bool is_signed_c = is_signed_a; - auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { + auto [is_merge, is_psl, is_high_b, mode, op_b, + op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::XMAD_CR: return {instr.xmad.merge_56, + instr.xmad.product_shift_left_second, + instr.xmad.high_b, + instr.xmad.mode_cbf, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RR: - return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; + return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, + instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RC: - return {false, GetRegister(instr.gpr39), + return {false, + false, + instr.xmad.high_b, + instr.xmad.mode_cbf, + GetRegister(instr.gpr39), GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::XMAD_IMM: - return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), + return {instr.xmad.merge_37, + instr.xmad.product_shift_left, + false, + instr.xmad.mode, + Immediate(static_cast<u32>(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; } UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); - return {false, Immediate(0), Immediate(0)}; + return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; }(); op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); const Node original_b = op_b; - op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); + op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); // TODO(Rodrigo): Use an appropiate sign for this operation Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); - if (instr.xmad.product_shift_left) { + if (is_psl) { product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); } + SetTemporal(bb, 0, product); + product = GetTemporal(0); const Node original_c = op_c; + const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error op_c = [&]() { - switch (instr.xmad.mode) { + switch (set_mode) { case Tegra::Shader::XmadMode::None: return original_c; case Tegra::Shader::XmadMode::CLo: @@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { } }(); + SetTemporal(bb, 1, op_c); + op_c = GetTemporal(1); + // TODO(Rodrigo): Use an appropiate sign for this operation Node sum = Operation(OperationCode::IAdd, product, op_c); + SetTemporal(bb, 2, sum); + sum = GetTemporal(2); if (is_merge) { const Node a = BitfieldExtract(sum, 0, 16); const Node b = @@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return pc; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 5bc3a3900..57af8b10f 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -7,6 +7,7 @@ #include <array> #include <cstring> #include <map> +#include <optional> #include <set> #include <string> #include <tuple> @@ -195,9 +196,23 @@ enum class ExitMethod { class Sampler { public: + // Use this constructor for bounded Samplers explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + // Use this constructor for bindless Samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + // Use this only for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} std::size_t GetOffset() const { return offset; @@ -219,6 +234,14 @@ public: return is_shadow; } + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + bool operator<(const Sampler& rhs) const { return std::tie(offset, index, type, is_array, is_shadow) < std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); @@ -230,8 +253,9 @@ private: std::size_t offset{}; std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; class ConstBuffer { @@ -275,6 +299,11 @@ struct GlobalMemoryBase { } }; +struct GlobalMemoryUsage { + bool is_read{}; + bool is_written{}; +}; + struct MetaArithmetic { bool precise{}; }; @@ -290,6 +319,7 @@ struct MetaTexture { const Sampler& sampler; Node array{}; Node depth_compare{}; + std::vector<Node> aoffi; Node bias{}; Node lod{}; Node component{}; @@ -576,8 +606,8 @@ public: return used_clip_distances; } - const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { - return used_global_memory_bases; + const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { + return used_global_memory; } std::size_t GetLength() const { @@ -728,6 +758,11 @@ private: const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow); + // Accesses a texture sampler for a bindless texture. + const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, + Tegra::Shader::TextureType type, bool is_array, + bool is_shadow); + /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); @@ -741,14 +776,15 @@ private: Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); + bool is_array, bool is_aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array); + bool depth_compare, bool is_array, bool is_aoffi); Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, bool is_array); @@ -757,9 +793,12 @@ private: Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); + std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); + Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset); + Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, + std::optional<Tegra::Shader::Register> bindless_reg); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -773,8 +812,15 @@ private: Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, + Node addr_register, + u32 immediate_offset, + bool is_write); + template <typename... T> Node Operation(OperationCode code, const T*... operands) { return StoreNode(OperationNode(code, operands...)); @@ -828,7 +874,7 @@ private: std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; - std::set<GlobalMemoryBase> used_global_memory_bases; + std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 33b071747..4505667ff 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -6,6 +6,7 @@ #include <utility> #include <variant> +#include "common/common_types.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -14,7 +15,7 @@ namespace { std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { - const Node node = code[cursor]; + const Node node = code.at(cursor); if (const auto operation = std::get_if<OperationNode>(node)) { if (operation->GetCode() == operation_code) return {node, cursor}; @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } +std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register + // that it uses as operand + const auto [found, found_cursor] = + TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + if (!found) { + return {}; + } + if (const auto immediate = std::get_if<ImmediateNode>(found)) { + return immediate->GetValue(); + } + return {}; +} + std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) { for (; cursor >= 0; --cursor) { |
