diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 22 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 6 | ||||
| -rw-r--r-- | src/video_core/macro_interpreter.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/rasterizer_cache.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 313 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 54 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/shader/decode/other.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 86 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 12 |
17 files changed, 398 insertions, 231 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 036e66f05..3175579cc 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -40,6 +40,13 @@ bool DmaPusher::Step() { } const CommandList& command_list{dma_pushbuffer.front()}; + ASSERT_OR_EXECUTE(!command_list.empty(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; GPUVAddr dma_get = command_list_header.addr; GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 48e4fec33..f342c78e6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -59,6 +59,7 @@ public: static constexpr std::size_t NumCBData = 16; static constexpr std::size_t NumVertexArrays = 32; static constexpr std::size_t NumVertexAttributes = 32; + static constexpr std::size_t NumVaryings = 31; static constexpr std::size_t NumTextureSamplers = 32; static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e5b4eadea..7bbc556da 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -98,6 +98,10 @@ union Attribute { BitField<22, 2, u64> element; BitField<24, 6, Index> index; BitField<47, 3, AttributeSize> size; + + bool IsPhysical() const { + return element == 0 && static_cast<u64>(index.Value()) == 0; + } } fmt20; union { @@ -499,6 +503,11 @@ enum class SystemVariable : u64 { CircularQueueEntryAddressHigh = 0x63, }; +enum class PhysicalAttributeDirection : u64 { + Input = 0, + Output = 1, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -587,6 +596,7 @@ union Instruction { } alu; union { + BitField<38, 1, u64> idx; BitField<51, 1, u64> saturate; BitField<52, 2, IpaSampleMode> sample_mode; BitField<54, 2, IpaInterpMode> interp_mode; @@ -812,6 +822,12 @@ union Instruction { } stg; union { + BitField<32, 1, PhysicalAttributeDirection> direction; + BitField<47, 3, AttributeSize> size; + BitField<20, 11, u64> address; + } al2p; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -1374,8 +1390,9 @@ public: ST_A, ST_L, ST_S, - LDG, // Load from global memory - STG, // Store in global memory + LDG, // Load from global memory + STG, // Store in global memory + AL2P, // Transforms attribute memory into physical memory TEX, TEX_B, // Texture Load Bindless TXQ, // Texture Query @@ -1646,6 +1663,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cdf86f562..05a168a72 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -81,12 +81,6 @@ struct CommandDataContainer { CommandDataContainer(CommandData&& data, u64 next_fence) : data{std::move(data)}, fence{next_fence} {} - CommandDataContainer& operator=(const CommandDataContainer& t) { - data = std::move(t.data); - fence = t.fence; - return *this; - } - CommandData data; u64 fence{}; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index fbea107ca..c766ed692 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -120,7 +120,9 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { // An instruction with the Exit flag will not actually // cause an exit if it's executed inside a delay slot. - if (opcode.is_exit && !is_delay_slot) { + // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further + // testing on the MME code. + if (opcode.is_exit) { // Exit has a delay slot, execute the next instruction Step(offset, true); return false; diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index f820f3ed9..0c4ea1494 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -144,8 +144,9 @@ protected: object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); + const CacheAddr addr = object->GetCacheAddr(); interval_cache.subtract({GetInterval(object), ObjectSet{object}}); - map_cache.erase(object->GetCacheAddr()); + map_cache.erase(addr); } /// Returns a ticks counter used for tracking when cached objects were last modified diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b6d9e0ddb..38497678a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -21,9 +21,18 @@ T GetInteger(GLenum pname) { Device::Device() { uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); + max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); } +Device::Device(std::nullptr_t) { + uniform_buffer_alignment = 0; + max_vertex_attributes = 16; + max_varyings = 15; + has_variable_aoffi = true; +} + bool Device::TestVariableAoffi() { const GLchar* AOFFI_TEST = R"(#version 430 core uniform sampler2D tex; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 78ff5ee58..de8490682 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -5,17 +5,27 @@ #pragma once #include <cstddef> +#include "common/common_types.h" namespace OpenGL { class Device { public: - Device(); + explicit Device(); + explicit Device(std::nullptr_t); std::size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + u32 GetMaxVertexAttributes() const { + return max_vertex_attributes; + } + + u32 GetMaxVaryings() const { + return max_varyings; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -24,6 +34,8 @@ private: static bool TestVariableAoffi(); std::size_t uniform_buffer_alignment{}; + u32 max_vertex_attributes{}; + u32 max_varyings{}; bool has_variable_aoffi{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1a62795e1..4bff54a59 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -134,6 +134,19 @@ bool IsPrecise(Node node) { return false; } +constexpr bool IsGenericAttribute(Attribute::Index index) { + return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; +} + +constexpr Attribute::Index ToGenericAttribute(u32 value) { + return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0)); +} + +u32 GetGenericAttributeIndex(Attribute::Index index) { + ASSERT(IsGenericAttribute(index)); + return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -152,6 +165,7 @@ public: DeclareConstantBuffers(); DeclareGlobalMemory(); DeclareSamplers(); + DeclarePhysicalAttributeReader(); code.AddLine("void execute_" + suffix + "() {"); ++code.scope; @@ -296,76 +310,95 @@ private: } std::string GetInputFlags(AttributeUse attribute) { - std::string out; - switch (attribute) { - case AttributeUse::Constant: - out += "flat "; - break; - case AttributeUse::ScreenLinear: - out += "noperspective "; - break; case AttributeUse::Perspective: // Default, Smooth - break; + return {}; + case AttributeUse::Constant: + return "flat "; + case AttributeUse::ScreenLinear: + return "noperspective "; default: - LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); - UNREACHABLE(); + case AttributeUse::Unused: + UNREACHABLE_MSG("Unused attribute being fetched"); + return {}; + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); + return {}; } - return out; } void DeclareInputAttributes() { - const auto& attributes = ir.GetInputAttributes(); - for (const auto element : attributes) { - const Attribute::Index index = element.first; - if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { - // Skip when it's not a generic attribute - continue; - } - - // TODO(bunnei): Use proper number of elements for these - u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - idx += GENERIC_VARYING_START_LOCATION; + if (ir.HasPhysicalAttributes()) { + const u32 num_inputs{GetNumPhysicalInputAttributes()}; + for (u32 i = 0; i < num_inputs; ++i) { + DeclareInputAttribute(ToGenericAttribute(i), true); } + code.AddNewLine(); + return; + } - std::string attr = GetInputAttribute(index); - if (stage == ShaderStage::Geometry) { - attr = "gs_" + attr + "[]"; - } - std::string suffix; - if (stage == ShaderStage::Fragment) { - const auto input_mode = - header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); - suffix = GetInputFlags(input_mode); + const auto& attributes = ir.GetInputAttributes(); + for (const auto index : attributes) { + if (IsGenericAttribute(index)) { + DeclareInputAttribute(index, false); } - code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + - attr + ';'); } if (!attributes.empty()) code.AddNewLine(); } + void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { + const u32 generic_index{GetGenericAttributeIndex(index)}; + + std::string name{GetInputAttribute(index)}; + if (stage == ShaderStage::Geometry) { + name = "gs_" + name + "[]"; + } + + std::string suffix; + if (stage == ShaderStage::Fragment) { + const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + if (skip_unused && input_mode == AttributeUse::Unused) { + return; + } + suffix = GetInputFlags(input_mode); + } + + u32 location = generic_index; + if (stage != ShaderStage::Vertex) { + // If inputs are varyings, add an offset + location += GENERIC_VARYING_START_LOCATION; + } + + code.AddLine("layout (location = " + std::to_string(location) + ") " + suffix + "in vec4 " + + name + ';'); + } + void DeclareOutputAttributes() { + if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { + for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { + DeclareOutputAttribute(ToGenericAttribute(i)); + } + code.AddNewLine(); + return; + } + const auto& attributes = ir.GetOutputAttributes(); for (const auto index : attributes) { - if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { - // Skip when it's not a generic attribute - continue; + if (IsGenericAttribute(index)) { + DeclareOutputAttribute(index); } - // TODO(bunnei): Use proper number of elements for these - const auto idx = static_cast<u32>(index) - - static_cast<u32>(Attribute::Index::Attribute_0) + - GENERIC_VARYING_START_LOCATION; - code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + - GetOutputAttribute(index) + ';'); } if (!attributes.empty()) code.AddNewLine(); } + void DeclareOutputAttribute(Attribute::Index index) { + const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + code.AddLine("layout (location = " + std::to_string(location) + ") out vec4 " + + GetOutputAttribute(index) + ';'); + } + void DeclareConstantBuffers() { for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; @@ -429,6 +462,39 @@ private: code.AddNewLine(); } + void DeclarePhysicalAttributeReader() { + if (!ir.HasPhysicalAttributes()) { + return; + } + code.AddLine("float readPhysicalAttribute(uint physical_address) {"); + ++code.scope; + code.AddLine("switch (physical_address) {"); + + // Just declare generic attributes for now. + const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; + for (u32 index = 0; index < num_attributes; ++index) { + const auto attribute{ToGenericAttribute(index)}; + for (u32 element = 0; element < 4; ++element) { + constexpr u32 generic_base{0x80}; + constexpr u32 generic_stride{16}; + constexpr u32 element_stride{4}; + const u32 address{generic_base + index * generic_stride + element * element_stride}; + + const bool declared{stage != ShaderStage::Fragment || + header.ps.GetAttributeUse(index) != AttributeUse::Unused}; + const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; + code.AddLine(fmt::format("case 0x{:x}: return {};", address, value)); + } + } + + code.AddLine("default: return 0;"); + + code.AddLine('}'); + --code.scope; + code.AddLine('}'); + code.AddNewLine(); + } + void VisitBlock(const NodeBlock& bb) { for (const Node node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { @@ -483,70 +549,12 @@ private: return value; } else if (const auto abuf = std::get_if<AbufNode>(node)) { - const auto attribute = abuf->GetIndex(); - const auto element = abuf->GetElement(); - - const auto GeometryPass = [&](const std::string& name) { - if (stage == ShaderStage::Geometry && abuf->GetBuffer()) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) + - ") % MAX_VERTEX_INPUT]"; - } - return name; - }; - - switch (attribute) { - case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { - return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); - } - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "gl_PointCoord.x"; - case 1: - return "gl_PointCoord.y"; - case 2: - case 3: - return "0"; - } - UNREACHABLE(); - return "0"; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderStage::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return "uintBitsToFloat(config_pack[0])"; - case 3: - return "uintBitsToFloat(gl_VertexID)"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return "0"; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderStage::Fragment); - switch (element) { - case 3: - return "itof(gl_FrontFacing ? -1 : 0)"; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return "0"; - default: - if (attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31) { - return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); - } - break; + UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, + "Physical attributes in geometry shaders are not implemented"); + if (abuf->IsPhysicalBuffer()) { + return "readPhysicalAttribute(ftou(" + Visit(abuf->GetPhysicalAddress()) + "))"; } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } else if (const auto cbuf = std::get_if<CbufNode>(node)) { const Node offset = cbuf->GetOffset(); @@ -598,6 +606,69 @@ private: return {}; } + std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + const auto GeometryPass = [&](std::string name) { + if (stage == ShaderStage::Geometry && buffer) { + // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games + // set an 0x80000000 index for those and the shader fails to build. Find out why + // this happens and what's its intent. + return "gs_" + std::move(name) + "[ftou(" + Visit(buffer) + ") % MAX_VERTEX_INPUT]"; + } + return name; + }; + + switch (attribute) { + case Attribute::Index::Position: + if (stage != ShaderStage::Fragment) { + return GeometryPass("position") + GetSwizzle(element); + } else { + return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); + } + case Attribute::Index::PointCoord: + switch (element) { + case 0: + return "gl_PointCoord.x"; + case 1: + return "gl_PointCoord.y"; + case 2: + case 3: + return "0"; + } + UNREACHABLE(); + return "0"; + case Attribute::Index::TessCoordInstanceIDVertexID: + // TODO(Subv): Find out what the values are for the first two elements when inside a + // vertex shader, and what's the value of the fourth element when inside a Tess Eval + // shader. + ASSERT(stage == ShaderStage::Vertex); + switch (element) { + case 2: + // Config pack's first value is instance_id. + return "uintBitsToFloat(config_pack[0])"; + case 3: + return "uintBitsToFloat(gl_VertexID)"; + } + UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); + return "0"; + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == ShaderStage::Fragment); + switch (element) { + case 3: + return "itof(gl_FrontFacing ? -1 : 0)"; + } + UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); + return "0"; + default: + if (IsGenericAttribute(attribute)) { + return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); + } + break; + } + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + return "0"; + } + std::string ApplyPrecise(Operation operation, const std::string& value) { if (!IsPrecise(operation)) { return value; @@ -833,6 +904,8 @@ private: target = GetRegister(gpr->GetIndex()); } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); + target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: @@ -844,8 +917,7 @@ private: case Attribute::Index::ClipDistances4567: return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']'; default: - if (attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31) { + if (IsGenericAttribute(attribute)) { return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", @@ -1591,15 +1663,11 @@ private: } std::string GetInputAttribute(Attribute::Index attribute) const { - const auto index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - return GetDeclarationWithSuffix(index, "input_attr"); + return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); } std::string GetOutputAttribute(Attribute::Index attribute) const { - const auto index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - return GetDeclarationWithSuffix(index, "output_attr"); + return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); } std::string GetConstBuffer(u32 index) const { @@ -1640,6 +1708,19 @@ private: return name + '_' + std::to_string(index) + '_' + suffix; } + u32 GetNumPhysicalInputAttributes() const { + return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); + } + + u32 GetNumPhysicalAttributes() const { + return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); + } + + u32 GetNumPhysicalVaryings() const { + return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, + Maxwell::NumVaryings); + } + const Device& device; const ShaderIR& ir; const ShaderStage stage; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 254c0d499..fba9c594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { return true; } -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) - : system{system}, precompiled_cache_virtual_file_offset{0} {} +ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} + +ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> ShaderDiskCacheOpenGL::LoadTransferable() { @@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - const auto entry = LoadDecompiledEntry(); + auto entry = LoadDecompiledEntry(); if (!entry) { return {}; } @@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - std::vector<u8> code(code_size); + std::string code(code_size, '\0'); if (!LoadArrayFromPrecompiled(code.data(), code.size())) { return {}; } ShaderDiskCacheDecompiled entry; - entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size); + entry.code = std::move(code); u32 const_buffers_count{}; if (!LoadObjectFromPrecompiled(const_buffers_count)) { @@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < const_buffers_count; ++i) { u32 max_offset{}; u32 index{}; - u8 is_indirect{}; + bool is_indirect{}; if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(is_indirect)) { return {}; } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index); + entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); } u32 samplers_count{}; @@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 offset{}; u64 index{}; u32 type{}; - u8 is_array{}; - u8 is_shadow{}; - u8 is_bindless{}; + bool is_array{}; + bool is_shadow{}; + bool is_bindless{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { return {}; } - entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), - static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), - is_array != 0, is_shadow != 0, is_bindless != 0); + entry.entries.samplers.emplace_back( + static_cast<std::size_t>(offset), static_cast<std::size_t>(index), + static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); } u32 global_memory_count{}; @@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; - u8 is_read{}; - u8 is_written{}; + bool is_read{}; + bool is_written{}; if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, - is_written != 0); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, + is_written); } for (auto& clip_distance : entry.entries.clip_distances) { - u8 clip_distance_raw{}; - if (!LoadObjectFromPrecompiled(clip_distance_raw)) + if (!LoadObjectFromPrecompiled(clip_distance)) { return {}; - clip_distance = clip_distance_raw != 0; + } } u64 shader_length{}; @@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: for (const auto& cbuf : entries.const_buffers) { if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) { + !SaveObjectToPrecompiled(cbuf.IsIndirect())) { return false; } } @@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) { + !SaveObjectToPrecompiled(sampler.IsArray()) || + !SaveObjectToPrecompiled(sampler.IsShadow()) || + !SaveObjectToPrecompiled(sampler.IsBindless())) { return false; } } @@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: for (const auto& gmem : entries.global_memory_entries) { if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) { + !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { return false; } } for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) { + if (!SaveObjectToPrecompiled(clip_distance)) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 0142b2e3b..2da0a4a23 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -70,14 +70,14 @@ namespace std { template <> struct hash<OpenGL::BaseBindings> { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const { + std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; } }; template <> struct hash<OpenGL::ShaderDiskCacheUsage> { - std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const { + std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { return static_cast<std::size_t>(usage.unique_identifier) ^ std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; } @@ -162,6 +162,7 @@ struct ShaderDiskCacheDump { class ShaderDiskCacheOpenGL { public: explicit ShaderDiskCacheOpenGL(Core::System& system); + ~ShaderDiskCacheOpenGL(); /// Loads transferable cache. If file has a old version or on failure, it deletes the file. std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> @@ -259,20 +260,35 @@ private: return SaveArrayToPrecompiled(&object, 1); } + bool SaveObjectToPrecompiled(bool object) { + const auto value = static_cast<u8>(object); + return SaveArrayToPrecompiled(&value, 1); + } + template <typename T> bool LoadObjectFromPrecompiled(T& object) { return LoadArrayFromPrecompiled(&object, 1); } - // Copre system + bool LoadObjectFromPrecompiled(bool& object) { + u8 value; + const bool read_ok = LoadArrayFromPrecompiled(&value, 1); + if (!read_ok) { + return false; + } + + object = value != 0; + return true; + } + + // Core system Core::System& system; // Stored transferable shaders std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file + // Stores whole precompiled cache which will be read from/saved to the precompiled cache file FileSys::VectorVfsFile precompiled_cache_virtual_file; // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset; + std::size_t precompiled_cache_virtual_file_offset = 0; // The cache has been loaded at boot bool tried_to_load{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a11000f6b..b61a6d170 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -194,8 +194,8 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& attr : ir.GetInputAttributes()) { - entries.attributes.insert(GetGenericAttributeLocation(attr.first)); + for (const auto& attribute : ir.GetInputAttributes()) { + entries.attributes.insert(GetGenericAttributeLocation(attribute)); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -321,8 +321,7 @@ private: } void DeclareInputAttributes() { - for (const auto element : ir.GetInputAttributes()) { - const Attribute::Index index = element.first; + for (const auto index : ir.GetInputAttributes()) { if (!IsGenericAttribute(index)) { continue; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea1092db1..6a992c543 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -12,6 +12,8 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" +#pragma optimize("", off) + namespace VideoCommon::Shader { using Tegra::Shader::Attribute; @@ -47,17 +49,20 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { "Indirect attribute loads are not supported"); UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); + UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && + instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, + "Non-32 bits PHYS reads are not implemented"); - Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass, - Tegra::Shader::IpaSampleMode::Default}; + const Node buffer{GetRegister(instr.gpr39)}; u64 next_element = instr.attribute.fmt20.element; auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); const auto LoadNextElement = [&](u32 reg_offset) { - const Node buffer = GetRegister(instr.gpr39); - const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), - next_element, input_mode, buffer); + const Node attribute{instr.attribute.fmt20.IsPhysical() + ? GetPhysicalInputAttribute(instr.gpr8, buffer) + : GetInputAttribute(static_cast<Attribute::Index>(next_index), + next_element, buffer)}; SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); @@ -239,6 +244,21 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::AL2P: { + // Ignore al2p.direction since we don't care about it. + + // Calculate emulation fake physical address. + const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; + const Node reg{GetRegister(instr.gpr8)}; + const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; + + // Set the fake address to target register. + SetRegister(bb, instr.gpr0, fake_address); + + // Signal the shader IR to declare all possible attributes and varyings + uses_physical_attributes = true; + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d750a2936..fa17c45b5 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -130,15 +130,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::IPA: { - const auto& attribute = instr.attribute.fmt28; + const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; + + const auto attribute = instr.attribute.fmt28; const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), instr.ipa.sample_mode.Value()}; - const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); - Node value = attr; + Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) + : GetInputAttribute(attribute.index, attribute.element); const Tegra::Shader::Attribute::Index index = attribute.index.Value(); - if (index >= Tegra::Shader::Attribute::Index::Attribute_0 && - index <= Tegra::Shader::Attribute::Index::Attribute_31) { + const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && + index <= Tegra::Shader::Attribute::Index::Attribute_31; + if (is_generic || is_physical) { // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. // In theory by setting them as perspective, OpenGL does the perspective correction. // A way must figured to reverse the last step of it. diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index e4eb0dfd9..153ad1fd0 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) + : program_code{program_code}, main_offset{main_offset} { + Decode(); +} + +ShaderIR::~ShaderIR() = default; + Node ShaderIR::StoreNode(NodeData&& node_data) { auto store = std::make_unique<NodeData>(node_data); const Node node = store.get(); @@ -89,13 +96,14 @@ Node ShaderIR::GetPredicate(bool immediate) { return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); } -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, - const Tegra::Shader::IpaMode& input_mode, Node buffer) { - const auto [entry, is_new] = - used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); - entry->second.insert(input_mode); +Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { + used_input_attributes.emplace(index); + return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); +} - return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); +Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { + uses_physical_attributes = true; + return StoreNode(AbufNode(GetRegister(physical_address), buffer)); } Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 65f1e1de9..0bf124252 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -328,40 +328,31 @@ struct MetaTexture { u32 element{}; }; -inline constexpr MetaArithmetic PRECISE = {true}; -inline constexpr MetaArithmetic NO_PRECISE = {false}; +constexpr MetaArithmetic PRECISE = {true}; +constexpr MetaArithmetic NO_PRECISE = {false}; using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; /// Holds any kind of operation that can be done in the IR class OperationNode final { public: - template <typename... T> - explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} + explicit OperationNode(OperationCode code) : code{code} {} - template <typename... T> - explicit constexpr OperationNode(OperationCode code, Meta&& meta) - : code{code}, meta{std::move(meta)} {} + explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {} template <typename... T> - explicit constexpr OperationNode(OperationCode code, const T*... operands) + explicit OperationNode(OperationCode code, const T*... operands) : OperationNode(code, {}, operands...) {} template <typename... T> - explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) - : code{code}, meta{std::move(meta)} { - - auto operands_list = {operands_...}; - for (auto& operand : operands_list) { - operands.push_back(operand); - } - } + explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_) + : code{code}, meta{std::move(meta)}, operands{operands_...} {} explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) : code{code}, meta{meta}, operands{std::move(operands)} {} explicit OperationNode(OperationCode code, std::vector<Node>&& operands) - : code{code}, meta{}, operands{std::move(operands)} {} + : code{code}, operands{std::move(operands)} {} OperationCode GetCode() const { return code; @@ -465,17 +456,14 @@ private: /// Attribute buffer memory (known as attributes or varyings in GLSL terms) class AbufNode final { public: - explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, - const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) - : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {} - + // Initialize for standard attributes (index is explicit). explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {}) - : input_mode{}, buffer{buffer}, index{index}, element{element} {} + : buffer{buffer}, index{index}, element{element} {} - Tegra::Shader::IpaMode GetInputMode() const { - return input_mode; - } + // Initialize for physical attributes (index is a variable value). + explicit constexpr AbufNode(Node physical_address, Node buffer = {}) + : physical_address{physical_address}, buffer{buffer} {} Tegra::Shader::Attribute::Index GetIndex() const { return index; @@ -489,11 +477,19 @@ public: return buffer; } + bool IsPhysicalBuffer() const { + return physical_address != nullptr; + } + + Node GetPhysicalAddress() const { + return physical_address; + } + private: - const Tegra::Shader::IpaMode input_mode; - const Node buffer; - const Tegra::Shader::Attribute::Index index; - const u32 element; + Node physical_address{}; + Node buffer{}; + Tegra::Shader::Attribute::Index index{}; + u32 element{}; }; /// Constant buffer node, usually mapped to uniform buffers in GLSL @@ -567,11 +563,8 @@ private: class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) - : program_code{program_code}, main_offset{main_offset} { - - Decode(); - } + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); + ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { return basic_blocks; @@ -585,8 +578,7 @@ public: return used_predicates; } - const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& - GetInputAttributes() const { + const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { return used_input_attributes; } @@ -615,6 +607,10 @@ public: return static_cast<std::size_t>(coverage_end * sizeof(u64)); } + bool HasPhysicalAttributes() const { + return uses_physical_attributes; + } + const Tegra::Shader::Header& GetHeader() const { return header; } @@ -696,8 +692,9 @@ private: /// Generates a predicate node for an immediate true or false value Node GetPredicate(bool immediate); /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, - const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); + Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); + /// Generates a node representing a physical input attribute. + Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); /// Generates a node representing an output attribute. Keeps track of used attributes. Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); /// Generates a node representing an internal flag @@ -814,11 +811,12 @@ private: void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc); - Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); + Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, + s64 cursor) const; std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, Node addr_register, @@ -835,12 +833,10 @@ private: return StoreNode(OperationNode(code, std::move(meta), operands...)); } - template <typename... T> Node Operation(OperationCode code, std::vector<Node>&& operands) { return StoreNode(OperationNode(code, std::move(operands))); } - template <typename... T> Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); } @@ -872,13 +868,13 @@ private: std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; - std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> - used_input_attributes; + std::set<Tegra::Shader::Attribute::Index> used_input_attributes; std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; + bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 4505667ff..19ede1eb9 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, for (; cursor >= 0; --cursor) { const Node node = code.at(cursor); if (const auto operation = std::get_if<OperationNode>(node)) { - if (operation->GetCode() == operation_code) + if (operation->GetCode() == operation_code) { return {node, cursor}; + } } if (const auto conditional = std::get_if<ConditionalNode>(node)) { const auto& conditional_code = conditional->GetCode(); const auto [found, internal_cursor] = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); - if (found) + if (found) { return {found, cursor}; + } } } return {}; } } // namespace -Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { +Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; @@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } -std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { +std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand const auto [found, found_cursor] = @@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, } std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) { + s64 cursor) const { for (; cursor >= 0; --cursor) { const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); if (!found_node) { |
