diff options
Diffstat (limited to 'src/video_core')
46 files changed, 1148 insertions, 874 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0b9b6edee..2d4caa08d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -100,6 +100,9 @@ add_library(video_core STATIC shader/decode/xmad.cpp shader/decode/other.cpp shader/decode.cpp + shader/node_helper.cpp + shader/node_helper.h + shader/node.h shader/shader_ir.cpp shader/shader_ir.h shader/track.cpp diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e83f25fa1..ffb3ec3e0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1663,6 +1663,7 @@ private: INST("111000100100----", Id::BRA, Type::Flow, "BRA"), INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), INST("111000110100---", Id::BRK, Type::Flow, "BRK"), + INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), @@ -1686,7 +1687,6 @@ private: INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), - INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1e2ff46b0..3f0939ec9 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -75,7 +75,7 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPus void ThreadManager::SubmitList(Tegra::CommandList&& entries) { const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; + const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); } diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1d1581f49..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,11 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstddef> #include <glad/glad.h> #include "common/logging/log.h" +#include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { @@ -24,6 +27,7 @@ Device::Device() { max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); + has_component_indexing_bug = TestComponentIndexingBug(); } Device::Device(std::nullptr_t) { @@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_variable_aoffi = true; + has_component_indexing_bug = false; } bool Device::TestVariableAoffi() { @@ -52,4 +57,53 @@ void main() { return supported; } +bool Device::TestComponentIndexingBug() { + constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; + const GLchar* COMPONENT_TEST = R"(#version 430 core +layout (std430, binding = 0) buffer OutputBuffer { + uint output_value; +}; +layout (std140, binding = 0) uniform InputBuffer { + uvec4 input_value[4096]; +}; +layout (location = 0) uniform uint idx; +void main() { + output_value = input_value[idx >> 2][idx & 3]; +})"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; + SCOPE_EXIT({ glDeleteProgram(shader); }); + glUseProgram(shader); + + OGLVertexArray vao; + vao.Create(); + glBindVertexArray(vao.handle); + + constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; + OGLBuffer ubo; + ubo.Create(); + glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); + + OGLBuffer ssbo; + ssbo.Create(); + glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); + + for (GLuint index = 4; index < 8; ++index) { + glInvalidateBufferData(ssbo.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); + + glProgramUniform1ui(shader, 0, index); + glDrawArrays(GL_POINTS, 0, 1); + + GLuint result; + glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); + if (result != values.at(index)) { + LOG_INFO(Render_OpenGL, log_message, true); + return true; + } + } + LOG_INFO(Render_OpenGL, log_message, false); + return false; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de8490682..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,13 +30,19 @@ public: return has_variable_aoffi; } + bool HasComponentIndexingBug() const { + return has_component_indexing_bug; + } + private: static bool TestVariableAoffi(); + static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; + bool has_component_indexing_bug{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d66252224..ac8a9e6b7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -35,8 +35,8 @@ struct UnspecializedShader { namespace { /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { - const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; +GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; return gpu.regs.code_address.CodeAddress() + shader_config.offset; } @@ -350,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device) - : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} + : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, + disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -546,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { - return last_shaders[static_cast<u32>(program)]; + if (!system.GPU().Maxwell3D().dirty_flags.shaders) { + return last_shaders[static_cast<std::size_t>(program)]; } - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(program)}; + auto& memory_manager{system.GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(program_addr)}; Shader shader{TryGet(host_ptr)}; + if (shader) { + return last_shaders[static_cast<std::size_t>(program)] = shader; + } - if (!shader) { - // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); - const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found != precompiled_shaders.end()) { - shader = - std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second, host_ptr); - } else { - shader = std::make_shared<CachedShader>( - device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b), host_ptr); - } - Register(shader); + // No shader found - create a new one + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; + ProgramCode program_code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); + } + + const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); + const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; + const auto found = precompiled_shaders.find(unique_identifier); + if (found != precompiled_shaders.end()) { + // Create a shader from the cache + shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, + precompiled_programs, found->second, host_ptr); + } else { + // Create a shader from guest memory + shader = std::make_shared<CachedShader>( + device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + std::move(program_code), std::move(program_code_b), host_ptr); } + Register(shader); - return last_shaders[static_cast<u32>(program)] = shader; + return last_shaders[static_cast<std::size_t>(program)] = shader; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 64e5a5594..09bd0761d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -137,6 +137,7 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; ShaderDiskCacheOpenGL disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ab75bb795..739477cc9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,7 +45,6 @@ struct TextureAoffi {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureAoffi, TextureArgument>; -enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); @@ -124,8 +123,8 @@ bool IsPrecise(Operation operand) { return false; } -bool IsPrecise(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { +bool IsPrecise(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { return IsPrecise(*operation); } return false; @@ -247,6 +246,12 @@ private: code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); + code.AddLine("in gl_PerVertex {{"); + ++code.scope; + code.AddLine("vec4 gl_Position;"); + --code.scope; + code.AddLine("}} gl_in[];"); + DeclareVertexRedeclarations(); } @@ -349,7 +354,7 @@ private: } void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 generic_index{GetGenericAttributeIndex(index)}; + const u32 location{GetGenericAttributeIndex(index)}; std::string name{GetInputAttribute(index)}; if (stage == ShaderStage::Geometry) { @@ -358,19 +363,13 @@ private: std::string suffix; if (stage == ShaderStage::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + const auto input_mode{header.ps.GetAttributeUse(location)}; if (skip_unused && input_mode == AttributeUse::Unused) { return; } suffix = GetInputFlags(input_mode); } - u32 location = generic_index; - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - location += GENERIC_VARYING_START_LOCATION; - } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } @@ -395,7 +394,7 @@ private: } void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + const u32 location{GetGenericAttributeIndex(index)}; code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } @@ -498,15 +497,15 @@ private: } void VisitBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { code.AddLine(expr); } } } - std::string Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + std::string Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); if (operation_index >= operation_decompilers.size()) { UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); @@ -520,7 +519,7 @@ private: return (this->*decompiler)(*operation); } - if (const auto gpr = std::get_if<GprNode>(node)) { + if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "0"; @@ -528,7 +527,7 @@ private: return GetRegister(index); } - if (const auto immediate = std::get_if<ImmediateNode>(node)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits @@ -537,7 +536,7 @@ private: return fmt::format("utof(0x{:x}u)", immediate->GetValue()); } - if (const auto predicate = std::get_if<PredicateNode>(node)) { + if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> std::string { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -554,7 +553,7 @@ private: return value; } - if (const auto abuf = std::get_if<AbufNode>(node)) { + if (const auto abuf = std::get_if<AbufNode>(&*node)) { UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { @@ -564,9 +563,9 @@ private: return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } - if (const auto cbuf = std::get_if<CbufNode>(node)) { + if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); @@ -577,30 +576,47 @@ private: if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); - return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + + if (!device.HasComponentIndexingBug()) { + return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset); + } + + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("float {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, + pack, GetSwizzle(swizzle)); + } + return result; } UNREACHABLE_MSG("Unmanaged offset node type"); } - if (const auto gmem = std::get_if<GmemNode>(node)) { + if (const auto gmem = std::get_if<GmemNode>(&*node)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); } - if (const auto lmem = std::get_if<LmemNode>(node)) { + if (const auto lmem = std::get_if<LmemNode>(&*node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } - if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return GetInternalFlag(internal_flag->GetFlag()); } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); ++code.scope; @@ -612,7 +628,7 @@ private: return {}; } - if (const auto comment = std::get_if<CommentNode>(node)) { + if (const auto comment = std::get_if<CommentNode>(&*node)) { return "// " + comment->GetText(); } @@ -620,7 +636,7 @@ private: return {}; } - std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { const auto GeometryPass = [&](std::string_view name) { if (stage == ShaderStage::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games @@ -633,10 +649,14 @@ private: switch (attribute) { case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { + switch (stage) { + case ShaderStage::Geometry: + return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), + GetSwizzle(element)); + case ShaderStage::Fragment: return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); + default: + UNREACHABLE(); } case Attribute::Index::PointCoord: switch (element) { @@ -852,7 +872,7 @@ private: std::string expr = ", "; switch (type) { case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (some extra arguments are // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -884,7 +904,7 @@ private: for (std::size_t index = 0; index < aoffi.size(); ++index) { const auto operand{aoffi.at(index)}; - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (AOFFI arguments are required // to be constant by the standard). expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -905,23 +925,23 @@ private: } std::string Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = GetRegister(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: - return "position"s + GetSwizzle(abuf->GetElement()); + return "gl_Position"s + GetSwizzle(abuf->GetElement()); case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: @@ -937,9 +957,9 @@ private: return "0"; } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); - } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); @@ -1216,12 +1236,12 @@ private: } std::string LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -1232,7 +1252,7 @@ private: return {}; } target = GetPredicate(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = GetInternalFlag(flag->GetFlag()); } @@ -1409,7 +1429,7 @@ private: } std::string Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); @@ -1418,7 +1438,7 @@ private: } std::string PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); @@ -1488,9 +1508,7 @@ private: // If a geometry shader is attached, it will always flip (it's the last stage before // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("position.xy *= viewport_flip.xy;"); - code.AddLine("gl_Position = position;"); - code.AddLine("position.w = 1.0;"); + code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1728,8 +1746,7 @@ private: } u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, - Maxwell::NumVaryings); + return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 269dda122..9148629ec 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -23,8 +23,6 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu out += GetCommonDeclarations(); out += R"( -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding @@ -47,7 +45,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { out += R"( void main() { - position = vec4(0.0, 0.0, 0.0, 0.0); execute_vertex(); )"; @@ -58,19 +55,12 @@ void main() { out += R"( // Set Position Y direction - position.y *= utof(config_pack[2]); + gl_Position.y *= utof(config_pack[2]); // Check if the flip stage is VertexB // Config pack's second value is flip_stage if (config_pack[1] == 1) { // Viewport can be flipped, which is unsupported by glViewport - position.xy *= viewport_flip.xy; - } - gl_Position = position; - - // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 - // For now, this is here to bring order in lieu of proper emulation - if (config_pack[1] == 1) { - position.w = 1.0; + gl_Position.xy *= viewport_flip.xy; } })"; @@ -84,9 +74,6 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se out += GetCommonDeclarations(); out += R"( -layout (location = 0) in vec4 gs_position[]; -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding @@ -122,8 +109,6 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in noperspective vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 00242ecbe..3b966ddc3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; +constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; } // namespace Alternatives @@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { SetupFamilies(dldi, surface); SetupProperties(dldi); + SetupFeatures(dldi); } VKDevice::~VKDevice() = default; bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { - const auto queue_cis = GetDeviceQueueCreateInfos(); - vk::PhysicalDeviceFeatures device_features{}; + vk::PhysicalDeviceFeatures device_features; + device_features.vertexPipelineStoresAndAtomics = true; + device_features.independentBlend = true; + device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; - const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const auto queue_cis = GetDeviceQueueCreateInfos(); + const std::vector<const char*> extensions = LoadExtensions(dldi); const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, nullptr, static_cast<u32>(extensions.size()), extensions.data(), &device_features); @@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, LOG_CRITICAL(Render_Vulkan, "Format={} with usage={} and type={} has no defined alternatives and host " "hardware does not support it", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + vk::to_string(wanted_format), vk::to_string(wanted_usage), static_cast<u32>(format_type)); UNREACHABLE(); return wanted_format; @@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, return wanted_format; } +bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const { + if (!features.textureCompressionASTC_LDR) { + return false; + } + const auto format_feature_usage{ + vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eTransferDst}; + constexpr std::array<vk::Format, 9> astc_formats = { + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; + for (const auto format : astc_formats) { + const auto format_properties{physical.getFormatProperties(format, dldi)}; + if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { + return false; + } + } + return true; +} + bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface) { - const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; - bool has_swapchain{}; for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - has_swapchain |= prop.extensionName == swapchain_extension; + has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); } if (!has_swapchain) { // The device doesn't support creating swapchains. @@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); - if (props.limits.maxUniformBufferRange < 65536) { + const auto properties{physical.getProperties(dldi)}; + const auto limits{properties.limits}; + if (limits.maxUniformBufferRange < 65536) { + return false; + } + + const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; + if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { return false; } @@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return true; } +std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { + std::vector<const char*> extensions; + extensions.reserve(2); + extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + + const auto Test = [&](const vk::ExtensionProperties& extension, + std::optional<std::reference_wrapper<bool>> status, const char* name, + u32 revision) { + if (extension.extensionName != std::string(name)) { + return; + } + extensions.push_back(name); + if (status) { + status->get() = true; + } + }; + + for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); + } + + return extensions; +} + void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; @@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); device_type = props.deviceType; uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); + max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); +} + +void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { + const auto supported_features{physical.getFeatures(dldi)}; + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { - static const float QUEUE_PRIORITY = 1.f; + static const float QUEUE_PRIORITY = 1.0f; std::set<u32> unique_queue_families = {graphics_family, present_family}; std::vector<vk::DeviceQueueCreateInfo> queue_cis; @@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; std::map<vk::Format, vk::FormatProperties> format_properties; - - const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi)); - }; - AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); - AddFormatQuery(vk::Format::eB5G6R5UnormPack16); - AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); - AddFormatQuery(vk::Format::eR8G8B8A8Srgb); - AddFormatQuery(vk::Format::eR8Unorm); - AddFormatQuery(vk::Format::eD32Sfloat); - AddFormatQuery(vk::Format::eD16Unorm); - AddFormatQuery(vk::Format::eD16UnormS8Uint); - AddFormatQuery(vk::Format::eD24UnormS8Uint); - AddFormatQuery(vk::Format::eD32SfloatS8Uint); - AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); - AddFormatQuery(vk::Format::eBc2UnormBlock); - AddFormatQuery(vk::Format::eBc3UnormBlock); - AddFormatQuery(vk::Format::eBc4UnormBlock); - + } return format_properties; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index e87c7a508..537825d8b 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -11,7 +11,7 @@ namespace Vulkan { -/// Format usage descriptor +/// Format usage descriptor. enum class FormatType { Linear, Optimal, Buffer }; /// Handles data specific to a physical device. @@ -34,12 +34,12 @@ public: vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; - /// Returns the dispatch loader with direct function pointers of the device + /// Returns the dispatch loader with direct function pointers of the device. const vk::DispatchLoaderDynamic& GetDispatchLoader() const { return dld; } - /// Returns the logical device + /// Returns the logical device. vk::Device GetLogical() const { return logical.get(); } @@ -69,30 +69,55 @@ public: return present_family; } - /// Returns if the device is integrated with the host CPU + /// Returns if the device is integrated with the host CPU. bool IsIntegrated() const { return device_type == vk::PhysicalDeviceType::eIntegratedGpu; } - /// Returns uniform buffer alignment requeriment + /// Returns uniform buffer alignment requeriment. u64 GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + /// Returns the maximum range for storage buffers. + u64 GetMaxStorageBufferRange() const { + return max_storage_buffer_range; + } + + /// Returns true if ASTC is natively supported. + bool IsOptimalAstcSupported() const { + return is_optimal_astc_supported; + } + + /// Returns true if the device supports VK_EXT_scalar_block_layout. + bool IsExtScalarBlockLayoutSupported() const { + return ext_scalar_block_layout; + } + /// Checks if the physical device is suitable. static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface); private: + /// Loads extensions into a vector and stores available ones in this object. + std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + /// Sets up queue families. void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); /// Sets up device properties. void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + /// Sets up device features. + void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + /// Returns a list of queue initialization descriptors. std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; + /// Returns true if ASTC textures are natively supported. + bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const; + /// Returns true if a format is supported. bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; @@ -101,16 +126,19 @@ private: static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device - vk::DispatchLoaderDynamic dld; ///< Device function pointers - UniqueDevice logical; ///< Logical device - vk::Queue graphics_queue; ///< Main graphics queue - vk::Queue present_queue; ///< Main present queue - u32 graphics_family{}; ///< Main graphics queue family index - u32 present_family{}; ///< Main present queue family index - vk::PhysicalDeviceType device_type; ///< Physical device type - u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment - std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::PhysicalDeviceType device_type; ///< Physical device type. + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. + u64 max_storage_buffer_range{}; ///< Max storage buffer size. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. + std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a5b25aeff..547883425 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -17,6 +17,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; // TODO(Rodrigo): Use rasterizer's value -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; +constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; constexpr u32 STAGE_BINDING_STRIDE = 0x100; enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; @@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler : public Sirit::Module { public: - explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) - : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { + explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) + : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { AddCapability(spv::Capability::Shader); AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_variable_pointers"); @@ -195,7 +197,9 @@ public: entries.samplers.emplace_back(sampler); } for (const auto& attribute : ir.GetInputAttributes()) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); + if (IsGenericAttribute(attribute)) { + entries.attributes.insert(GetGenericAttributeLocation(attribute)); + } } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -210,7 +214,6 @@ private: std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); - static constexpr u32 CBUF_STRIDE = 16; void AllocateBindings() { const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; @@ -315,6 +318,7 @@ private: constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { + const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -374,7 +378,9 @@ private: u32 binding = const_buffers_base_binding; for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); + const Id type = + device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; + const Id id = OpVariable(type, spv::StorageClass::Uniform); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); Decorate(id, spv::Decoration::Binding, binding++); @@ -475,13 +481,13 @@ private: } void VisitBasicBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { static_cast<void>(Visit(node)); } } - Id Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + Id Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { @@ -489,17 +495,17 @@ private: } return (this->*decompiler)(*operation); - } else if (const auto gpr = std::get_if<GprNode>(node)) { + } else if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return Constant(t_float, 0.0f); } return Emit(OpLoad(t_float, registers.at(index))); - } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { + } else if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { return BitcastTo<Type::Float>(Constant(t_uint, immediate->GetValue())); - } else if (const auto predicate = std::get_if<PredicateNode>(node)) { + } else if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> Id { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -515,7 +521,7 @@ private: } return value; - } else if (const auto abuf = std::get_if<AbufNode>(node)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*node)) { const auto attribute = abuf->GetIndex(); const auto element = abuf->GetElement(); @@ -565,40 +571,42 @@ private: } UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); - } else if (const auto cbuf = std::get_if<CbufNode>(node)) { - const Node offset = cbuf->GetOffset(); + } else if (const auto cbuf = std::get_if<CbufNode>(&*node)) { + const Node& offset = cbuf->GetOffset(); const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - Id buffer_index{}; - Id buffer_element{}; - - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - - } else if (std::holds_alternative<OperationNode>(*offset)) { - // Indirect access - // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which - // emits sub-optimal code on GLSL from my testing). - const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); - const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); - const Id final_offset = Emit( - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); - buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); - buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); - + Id pointer{}; + if (device.IsExtScalarBlockLayoutSupported()) { + const Id buffer_offset = Emit(OpShiftRightLogical( + t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); + pointer = Emit( + OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); } else { - UNREACHABLE_MSG("Unmanaged offset node type"); + Id buffer_index{}; + Id buffer_element{}; + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + // Direct access + const u32 offset_imm = immediate->GetValue(); + ASSERT(offset_imm % 4 == 0); + buffer_index = Constant(t_uint, offset_imm / 16); + buffer_element = Constant(t_uint, (offset_imm / 4) % 4); + } else if (std::holds_alternative<OperationNode>(*offset)) { + // Indirect access + const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); + const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); + const Id final_offset = Emit(OpUMod( + t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); + buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); + buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); + } else { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), + buffer_index, buffer_element)); } - - const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), - buffer_index, buffer_element)); return Emit(OpLoad(t_float, pointer)); - } else if (const auto gmem = std::get_if<GmemNode>(node)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*node)) { const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); const Id real = BitcastTo<Type::Uint>(Visit(gmem->GetRealAddress())); const Id base = BitcastTo<Type::Uint>(Visit(gmem->GetBaseAddress())); @@ -608,11 +616,13 @@ private: return Emit(OpLoad(t_float, Emit(OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0u), offset)))); - } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { + } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead const Id true_label = OpLabel(); const Id skip_label = OpLabel(); - Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); + const Id condition = Visit(conditional->GetCondition()); + Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); + Emit(OpBranchConditional(condition, true_label, skip_label)); Emit(true_label); VisitBasicBlock(conditional->GetCode()); @@ -621,7 +631,7 @@ private: Emit(skip_label); return {}; - } else if (const auto comment = std::get_if<CommentNode>(node)) { + } else if (const auto comment = std::get_if<CommentNode>(&*node)) { Name(Emit(OpUndef(t_void)), comment->GetText()); return {}; } @@ -689,18 +699,18 @@ private: } Id Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; Id target{}; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = registers.at(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { target = [&]() -> Id { switch (const auto attribute = abuf->GetIndex(); attribute) { case Attribute::Index::Position: @@ -725,7 +735,7 @@ private: } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { Id address = BitcastTo<Type::Uint>(Visit(lmem->GetAddress())); address = Emit(OpUDiv(t_uint, address, Constant(t_uint, 4))); target = Emit(OpAccessChain(t_prv_float, local_memory, {address})); @@ -771,11 +781,11 @@ private: } Id LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; Id target{}; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -787,7 +797,7 @@ private: } target = predicates.at(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = internal_flags.at(static_cast<u32>(flag->GetFlag())); } @@ -873,7 +883,7 @@ private: } else { u32 component_value = 0; if (meta->component) { - const auto component = std::get_if<ImmediateNode>(meta->component); + const auto component = std::get_if<ImmediateNode>(&*meta->component); ASSERT_MSG(component, "Component is not an immediate value"); component_value = component->GetValue(); } @@ -930,7 +940,7 @@ private: } Id Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue()))); @@ -939,7 +949,7 @@ private: } Id PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); ASSERT(target); const Id current = Emit(OpLoad(t_uint, flow_stack_top)); @@ -968,11 +978,11 @@ private: case ShaderStage::Vertex: { // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. - const Id position = AccessElement(t_float4, per_vertex, position_index); - Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); + const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); + Id depth = Emit(OpLoad(t_float, z_pointer)); depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); - Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); + Emit(OpStore(z_pointer, depth)); break; } case ShaderStage::Fragment: { @@ -1311,6 +1321,7 @@ private: &SPIRVDecompiler::WorkGroupId<2>, }; + const VKDevice& device; const ShaderIR& ir; const ShaderStage stage; const Tegra::Shader::Header header; @@ -1349,12 +1360,18 @@ private: const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_array = - Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), - spv::Decoration::ArrayStride, CBUF_STRIDE); - const Id t_cbuf_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); + const Id t_cbuf_std140 = Decorate( + Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), + spv::Decoration::ArrayStride, 16u); + const Id t_cbuf_scalar = Decorate( + Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), + spv::Decoration::ArrayStride, 4u); + const Id t_cbuf_std140_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_scalar_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); + const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); const Id t_gmem_array = @@ -1403,8 +1420,9 @@ private: std::map<u32, Id> labels; }; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { - auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage) { + auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); decompiler->Decompile(); return {std::move(decompiler), decompiler->GetShaderEntries()}; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -20,10 +20,13 @@ namespace VideoCommon::Shader { class ShaderIR; } +namespace Vulkan { +class VKDevice; +} + namespace Vulkan::VKShader { using Maxwell = Tegra::Engines::Maxwell3D::Regs; - using SamplerEntry = VideoCommon::Shader::Sampler; constexpr u32 DESCRIPTOR_SET = 0; @@ -75,6 +78,7 @@ struct ShaderEntries { using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage); } // namespace Vulkan::VKShader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2da595c0d..a0554c97e 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index b4859bc1e..87d8fecaa 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index 3a29c4a46..b06cbe441 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index 5341e460f..7bcf38f23 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp index 3095f2fd4..f1875967c 100644 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 9fd4b273e..c8c1a7f40 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 679ac0d4e..73880db0e 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index 1ae192c6a..e02bcd097 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index 0b12a0d08..8be1119df 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index b5ec9a6f5..4221f0c58 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index a1d04c6e5..29be25ca3 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index cc522f1de..f5013e44a 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index 9d2322a1d..2323052b0 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 755f2ec44..48ca7a4af 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index fba44d714..d59d15bd8 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index a425f9eb7..c3bcf1ae9 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index a4cdaf74d..46e3d5905 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -4,6 +4,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index a6a1fb632..dd20775d7 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e6a010a7d..80fc0ccfc 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -169,7 +170,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); SetTemporal(bb, i, gmem); } @@ -262,7 +263,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Node it_offset = Immediate(i * 4); const Node real_address = Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); } @@ -298,9 +299,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB const Node base_address{ TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf = std::get_if<CbufNode>(base_address); + const auto cbuf = std::get_if<CbufNode>(&*base_address); ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index a6c123573..6fc07f213 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -6,6 +6,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp index 71844c42b..9290d22eb 100644 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp index 387491bd3..febbfeb50 100644 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ b/src/video_core/shader/decode/predicate_set_register.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index f8659e48e..e6c9d287e 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 44ae87ece..2ac16eeb0 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 5b033126d..4a356dbd4 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -291,8 +292,8 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, const Node sampler_register = GetRegister(reg); const Node base_sampler = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(base_sampler); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); + const auto cbuf = std::get_if<CbufNode>(&*base_sampler); + const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); const auto cbuf_offset = cbuf_offset_imm->GetValue(); const auto cbuf_index = cbuf->GetIndex(); @@ -388,8 +389,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { - const bool is_array = array; - const bool is_shadow = depth_compare; + const auto is_array = static_cast<bool>(array); + const auto is_shadow = static_cast<bool>(depth_compare); const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index cb9ab72b1..97fc6f9b1 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 04a776398..93dee77d1 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h new file mode 100644 index 000000000..c002f90f9 --- /dev/null +++ b/src/video_core/shader/node.h @@ -0,0 +1,514 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstddef> +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <variant> +#include <vector> + +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" + +namespace VideoCommon::Shader { + +enum class OperationCode { + Assign, /// (float& dest, float src) -> void + + Select, /// (MetaArithmetic, bool pred, float a, float b) -> float + + FAdd, /// (MetaArithmetic, float a, float b) -> float + FMul, /// (MetaArithmetic, float a, float b) -> float + FDiv, /// (MetaArithmetic, float a, float b) -> float + FFma, /// (MetaArithmetic, float a, float b, float c) -> float + FNegate, /// (MetaArithmetic, float a) -> float + FAbsolute, /// (MetaArithmetic, float a) -> float + FClamp, /// (MetaArithmetic, float value, float min, float max) -> float + FMin, /// (MetaArithmetic, float a, float b) -> float + FMax, /// (MetaArithmetic, float a, float b) -> float + FCos, /// (MetaArithmetic, float a) -> float + FSin, /// (MetaArithmetic, float a) -> float + FExp2, /// (MetaArithmetic, float a) -> float + FLog2, /// (MetaArithmetic, float a) -> float + FInverseSqrt, /// (MetaArithmetic, float a) -> float + FSqrt, /// (MetaArithmetic, float a) -> float + FRoundEven, /// (MetaArithmetic, float a) -> float + FFloor, /// (MetaArithmetic, float a) -> float + FCeil, /// (MetaArithmetic, float a) -> float + FTrunc, /// (MetaArithmetic, float a) -> float + FCastInteger, /// (MetaArithmetic, int a) -> float + FCastUInteger, /// (MetaArithmetic, uint a) -> float + + IAdd, /// (MetaArithmetic, int a, int b) -> int + IMul, /// (MetaArithmetic, int a, int b) -> int + IDiv, /// (MetaArithmetic, int a, int b) -> int + INegate, /// (MetaArithmetic, int a) -> int + IAbsolute, /// (MetaArithmetic, int a) -> int + IMin, /// (MetaArithmetic, int a, int b) -> int + IMax, /// (MetaArithmetic, int a, int b) -> int + ICastFloat, /// (MetaArithmetic, float a) -> int + ICastUnsigned, /// (MetaArithmetic, uint a) -> int + ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int + ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int + IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int + IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int + IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int + IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int + IBitwiseNot, /// (MetaArithmetic, int a) -> int + IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int + IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int + IBitCount, /// (MetaArithmetic, int) -> int + + UAdd, /// (MetaArithmetic, uint a, uint b) -> uint + UMul, /// (MetaArithmetic, uint a, uint b) -> uint + UDiv, /// (MetaArithmetic, uint a, uint b) -> uint + UMin, /// (MetaArithmetic, uint a, uint b) -> uint + UMax, /// (MetaArithmetic, uint a, uint b) -> uint + UCastFloat, /// (MetaArithmetic, float a) -> uint + UCastSigned, /// (MetaArithmetic, int a) -> uint + ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint + ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint + UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint + UBitwiseNot, /// (MetaArithmetic, uint a) -> uint + UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint + UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint + UBitCount, /// (MetaArithmetic, uint) -> uint + + HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 + HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 + HAbsolute, /// (f16vec2 a) -> f16vec2 + HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 + HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 + HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 + HMergeF32, /// (f16vec2 src) -> float + HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 + HPack2, /// (float a, float b) -> f16vec2 + + LogicalAssign, /// (bool& dst, bool src) -> void + LogicalAnd, /// (bool a, bool b) -> bool + LogicalOr, /// (bool a, bool b) -> bool + LogicalXor, /// (bool a, bool b) -> bool + LogicalNegate, /// (bool a) -> bool + LogicalPick2, /// (bool2 pair, uint index) -> bool + LogicalAll2, /// (bool2 a) -> bool + LogicalAny2, /// (bool2 a) -> bool + + LogicalFLessThan, /// (float a, float b) -> bool + LogicalFEqual, /// (float a, float b) -> bool + LogicalFLessEqual, /// (float a, float b) -> bool + LogicalFGreaterThan, /// (float a, float b) -> bool + LogicalFNotEqual, /// (float a, float b) -> bool + LogicalFGreaterEqual, /// (float a, float b) -> bool + LogicalFIsNan, /// (float a) -> bool + + LogicalILessThan, /// (int a, int b) -> bool + LogicalIEqual, /// (int a, int b) -> bool + LogicalILessEqual, /// (int a, int b) -> bool + LogicalIGreaterThan, /// (int a, int b) -> bool + LogicalINotEqual, /// (int a, int b) -> bool + LogicalIGreaterEqual, /// (int a, int b) -> bool + + LogicalULessThan, /// (uint a, uint b) -> bool + LogicalUEqual, /// (uint a, uint b) -> bool + LogicalULessEqual, /// (uint a, uint b) -> bool + LogicalUGreaterThan, /// (uint a, uint b) -> bool + LogicalUNotEqual, /// (uint a, uint b) -> bool + LogicalUGreaterEqual, /// (uint a, uint b) -> bool + + Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 + + Texture, /// (MetaTexture, float[N] coords) -> float4 + TextureLod, /// (MetaTexture, float[N] coords) -> float4 + TextureGather, /// (MetaTexture, float[N] coords) -> float4 + TextureQueryDimensions, /// (MetaTexture, float a) -> float4 + TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 + TexelFetch, /// (MetaTexture, int[N], int) -> float4 + + Branch, /// (uint branch_target) -> void + PushFlowStack, /// (uint branch_target) -> void + PopFlowStack, /// () -> void + Exit, /// () -> void + Discard, /// () -> void + + EmitVertex, /// () -> void + EndPrimitive, /// () -> void + + YNegate, /// () -> float + LocalInvocationIdX, /// () -> uint + LocalInvocationIdY, /// () -> uint + LocalInvocationIdZ, /// () -> uint + WorkGroupIdX, /// () -> uint + WorkGroupIdY, /// () -> uint + WorkGroupIdZ, /// () -> uint + + Amount, +}; + +enum class InternalFlag { + Zero = 0, + Sign = 1, + Carry = 2, + Overflow = 3, + Amount = 4, +}; + +class OperationNode; +class ConditionalNode; +class GprNode; +class ImmediateNode; +class InternalFlagNode; +class PredicateNode; +class AbufNode; +class CbufNode; +class LmemNode; +class GmemNode; +class CommentNode; + +using NodeData = + std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, + PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; +using Node = std::shared_ptr<NodeData>; +using Node4 = std::array<Node, 4>; +using NodeBlock = std::vector<Node>; + +class Sampler { +public: + /// This constructor is for bound samplers + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{false} {} + + /// This constructor is for bindless samplers + explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, + Tegra::Shader::TextureType type, bool is_array, bool is_shadow) + : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, + is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} + + /// This constructor is for serialization/deserialization + explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, + bool is_array, bool is_shadow, bool is_bindless) + : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, + is_bindless{is_bindless} {} + + std::size_t GetOffset() const { + return offset; + } + + std::size_t GetIndex() const { + return index; + } + + Tegra::Shader::TextureType GetType() const { + return type; + } + + bool IsArray() const { + return is_array; + } + + bool IsShadow() const { + return is_shadow; + } + + bool IsBindless() const { + return is_bindless; + } + + std::pair<u32, u32> GetBindlessCBuf() const { + return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; + } + + bool operator<(const Sampler& rhs) const { + return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < + std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, + rhs.is_bindless); + } + +private: + /// Offset in TSC memory from which to read the sampler object, as specified by the sampling + /// instruction. + std::size_t offset{}; + std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. + Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) + bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. + bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. + bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. +}; + +struct GlobalMemoryBase { + u32 cbuf_index{}; + u32 cbuf_offset{}; + + bool operator<(const GlobalMemoryBase& rhs) const { + return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); + } +}; + +/// Parameters describing an arithmetic operation +struct MetaArithmetic { + bool precise{}; ///< Whether the operation can be constraint or not +}; + +/// Parameters describing a texture sampler +struct MetaTexture { + const Sampler& sampler; + Node array; + Node depth_compare; + std::vector<Node> aoffi; + Node bias; + Node lod; + Node component{}; + u32 element{}; +}; + +/// Parameters that modify an operation but are not part of any particular operand +using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; + +/// Holds any kind of operation that can be done in the IR +class OperationNode final { +public: + explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} + + explicit OperationNode(OperationCode code, Meta meta) + : OperationNode(code, meta, std::vector<Node>{}) {} + + explicit OperationNode(OperationCode code, std::vector<Node> operands) + : OperationNode(code, Meta{}, std::move(operands)) {} + + explicit OperationNode(OperationCode code, Meta meta, std::vector<Node> operands) + : code{code}, meta{std::move(meta)}, operands{std::move(operands)} {} + + template <typename... Args> + explicit OperationNode(OperationCode code, Meta meta, Args&&... operands) + : code{code}, meta{std::move(meta)}, operands{operands...} {} + + OperationCode GetCode() const { + return code; + } + + const Meta& GetMeta() const { + return meta; + } + + std::size_t GetOperandsCount() const { + return operands.size(); + } + + const Node& operator[](std::size_t operand_index) const { + return operands.at(operand_index); + } + +private: + OperationCode code{}; + Meta meta{}; + std::vector<Node> operands; +}; + +/// Encloses inside any kind of node that returns a boolean conditionally-executed code +class ConditionalNode final { +public: + explicit ConditionalNode(Node condition, std::vector<Node>&& code) + : condition{std::move(condition)}, code{std::move(code)} {} + + const Node& GetCondition() const { + return condition; + } + + const std::vector<Node>& GetCode() const { + return code; + } + +private: + Node condition; ///< Condition to be satisfied + std::vector<Node> code; ///< Code to execute +}; + +/// A general purpose register +class GprNode final { +public: + explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} + + u32 GetIndex() const { + return static_cast<u32>(index); + } + +private: + Tegra::Shader::Register index{}; +}; + +/// A 32-bits value that represents an immediate value +class ImmediateNode final { +public: + explicit constexpr ImmediateNode(u32 value) : value{value} {} + + u32 GetValue() const { + return value; + } + +private: + u32 value{}; +}; + +/// One of Maxwell's internal flags +class InternalFlagNode final { +public: + explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} + + InternalFlag GetFlag() const { + return flag; + } + +private: + InternalFlag flag{}; +}; + +/// A predicate register, it can be negated without additional nodes +class PredicateNode final { +public: + explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) + : index{index}, negated{negated} {} + + Tegra::Shader::Pred GetIndex() const { + return index; + } + + bool IsNegated() const { + return negated; + } + +private: + Tegra::Shader::Pred index{}; + bool negated{}; +}; + +/// Attribute buffer memory (known as attributes or varyings in GLSL terms) +class AbufNode final { +public: + // Initialize for standard attributes (index is explicit). + explicit AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {}) + : buffer{std::move(buffer)}, index{index}, element{element} {} + + // Initialize for physical attributes (index is a variable value). + explicit AbufNode(Node physical_address, Node buffer = {}) + : physical_address{std::move(physical_address)}, buffer{std::move(buffer)} {} + + Tegra::Shader::Attribute::Index GetIndex() const { + return index; + } + + u32 GetElement() const { + return element; + } + + const Node& GetBuffer() const { + return buffer; + } + + bool IsPhysicalBuffer() const { + return static_cast<bool>(physical_address); + } + + const Node& GetPhysicalAddress() const { + return physical_address; + } + +private: + Node physical_address; + Node buffer; + Tegra::Shader::Attribute::Index index{}; + u32 element{}; +}; + +/// Constant buffer node, usually mapped to uniform buffers in GLSL +class CbufNode final { +public: + explicit CbufNode(u32 index, Node offset) : index{index}, offset{std::move(offset)} {} + + u32 GetIndex() const { + return index; + } + + const Node& GetOffset() const { + return offset; + } + +private: + u32 index{}; + Node offset; +}; + +/// Local memory node +class LmemNode final { +public: + explicit LmemNode(Node address) : address{std::move(address)} {} + + const Node& GetAddress() const { + return address; + } + +private: + Node address; +}; + +/// Global memory node +class GmemNode final { +public: + explicit GmemNode(Node real_address, Node base_address, const GlobalMemoryBase& descriptor) + : real_address{std::move(real_address)}, base_address{std::move(base_address)}, + descriptor{descriptor} {} + + const Node& GetRealAddress() const { + return real_address; + } + + const Node& GetBaseAddress() const { + return base_address; + } + + const GlobalMemoryBase& GetDescriptor() const { + return descriptor; + } + +private: + Node real_address; + Node base_address; + GlobalMemoryBase descriptor; +}; + +/// Commentary, can be dropped +class CommentNode final { +public: + explicit CommentNode(std::string text) : text{std::move(text)} {} + + const std::string& GetText() const { + return text; + } + +private: + std::string text; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp new file mode 100644 index 000000000..6fccbbba3 --- /dev/null +++ b/src/video_core/shader/node_helper.cpp @@ -0,0 +1,99 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstring> +#include <vector> + +#include "common/common_types.h" +#include "video_core/shader/node_helper.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +Node Conditional(Node condition, std::vector<Node> code) { + return MakeNode<ConditionalNode>(condition, std::move(code)); +} + +Node Comment(std::string text) { + return MakeNode<CommentNode>(std::move(text)); +} + +Node Immediate(u32 value) { + return MakeNode<ImmediateNode>(value); +} + +Node Immediate(s32 value) { + return Immediate(static_cast<u32>(value)); +} + +Node Immediate(f32 value) { + u32 integral; + std::memcpy(&integral, &value, sizeof(u32)); + return Immediate(integral); +} + +OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { + if (is_signed) { + return operation_code; + } + switch (operation_code) { + case OperationCode::FCastInteger: + return OperationCode::FCastUInteger; + case OperationCode::IAdd: + return OperationCode::UAdd; + case OperationCode::IMul: + return OperationCode::UMul; + case OperationCode::IDiv: + return OperationCode::UDiv; + case OperationCode::IMin: + return OperationCode::UMin; + case OperationCode::IMax: + return OperationCode::UMax; + case OperationCode::ICastFloat: + return OperationCode::UCastFloat; + case OperationCode::ICastUnsigned: + return OperationCode::UCastSigned; + case OperationCode::ILogicalShiftLeft: + return OperationCode::ULogicalShiftLeft; + case OperationCode::ILogicalShiftRight: + return OperationCode::ULogicalShiftRight; + case OperationCode::IArithmeticShiftRight: + return OperationCode::UArithmeticShiftRight; + case OperationCode::IBitwiseAnd: + return OperationCode::UBitwiseAnd; + case OperationCode::IBitwiseOr: + return OperationCode::UBitwiseOr; + case OperationCode::IBitwiseXor: + return OperationCode::UBitwiseXor; + case OperationCode::IBitwiseNot: + return OperationCode::UBitwiseNot; + case OperationCode::IBitfieldInsert: + return OperationCode::UBitfieldInsert; + case OperationCode::IBitCount: + return OperationCode::UBitCount; + case OperationCode::LogicalILessThan: + return OperationCode::LogicalULessThan; + case OperationCode::LogicalIEqual: + return OperationCode::LogicalUEqual; + case OperationCode::LogicalILessEqual: + return OperationCode::LogicalULessEqual; + case OperationCode::LogicalIGreaterThan: + return OperationCode::LogicalUGreaterThan; + case OperationCode::LogicalINotEqual: + return OperationCode::LogicalUNotEqual; + case OperationCode::LogicalIGreaterEqual: + return OperationCode::LogicalUGreaterEqual; + case OperationCode::INegate: + UNREACHABLE_MSG("Can't negate an unsigned integer"); + return {}; + case OperationCode::IAbsolute: + UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); + return {}; + default: + UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); + return {}; + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h new file mode 100644 index 000000000..0c2aa749b --- /dev/null +++ b/src/video_core/shader/node_helper.h @@ -0,0 +1,65 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <string> +#include <tuple> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/common_types.h" +#include "video_core/shader/node.h" + +namespace VideoCommon::Shader { + +/// This arithmetic operation cannot be constraint +inline constexpr MetaArithmetic PRECISE = {true}; +/// This arithmetic operation can be optimized away +inline constexpr MetaArithmetic NO_PRECISE = {false}; + +/// Creates a conditional node +Node Conditional(Node condition, std::vector<Node> code); + +/// Creates a commentary node +Node Comment(std::string text); + +/// Creates an u32 immediate +Node Immediate(u32 value); + +/// Creates a s32 immediate +Node Immediate(s32 value); + +/// Creates a f32 immediate +Node Immediate(f32 value); + +/// Converts an signed operation code to an unsigned operation code +OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); + +template <typename T, typename... Args> +Node MakeNode(Args&&... args) { + static_assert(std::is_convertible_v<T, NodeData>); + return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); +} + +template <typename... Args> +Node Operation(OperationCode code, Args&&... args) { + if constexpr (sizeof...(args) == 0) { + return MakeNode<OperationNode>(code); + } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, + Meta>) { + return MakeNode<OperationNode>(code, std::forward<Args>(args)...); + } else { + return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); + } +} + +template <typename... Args> +Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { + return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8a6ee5cf5..11b545cca 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -28,30 +29,11 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) ShaderIR::~ShaderIR() = default; -Node ShaderIR::StoreNode(NodeData&& node_data) { - auto store = std::make_unique<NodeData>(node_data); - const Node node = store.get(); - stored_nodes.push_back(std::move(store)); - return node; -} - -Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { - return StoreNode(ConditionalNode(condition, std::move(code))); -} - -Node ShaderIR::Comment(std::string text) { - return StoreNode(CommentNode(std::move(text))); -} - -Node ShaderIR::Immediate(u32 value) { - return StoreNode(ImmediateNode(value)); -} - Node ShaderIR::GetRegister(Register reg) { if (reg != Register::ZeroIndex) { used_registers.insert(static_cast<u32>(reg)); } - return StoreNode(GprNode(reg)); + return MakeNode<GprNode>(reg); } Node ShaderIR::GetImmediate19(Instruction instr) { @@ -69,7 +51,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto [entry, is_new] = used_cbufs.try_emplace(index); entry->second.MarkAsUsed(offset); - return StoreNode(CbufNode(index, Immediate(offset))); + return MakeNode<CbufNode>(index, Immediate(offset)); } Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { @@ -80,7 +62,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { entry->second.MarkAsUsedIndirect(); const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); - return StoreNode(CbufNode(index, final_offset)); + return MakeNode<CbufNode>(index, final_offset); } Node ShaderIR::GetPredicate(u64 pred_, bool negated) { @@ -89,7 +71,7 @@ Node ShaderIR::GetPredicate(u64 pred_, bool negated) { used_predicates.insert(pred); } - return StoreNode(PredicateNode(pred, negated)); + return MakeNode<PredicateNode>(pred, negated); } Node ShaderIR::GetPredicate(bool immediate) { @@ -98,12 +80,12 @@ Node ShaderIR::GetPredicate(bool immediate) { Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { used_input_attributes.emplace(index); - return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); + return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); } Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { uses_physical_attributes = true; - return StoreNode(AbufNode(GetRegister(physical_address), buffer)); + return MakeNode<AbufNode>(GetRegister(physical_address), buffer); } Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { @@ -115,11 +97,11 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff } used_output_attributes.insert(index); - return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); + return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); } Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { - const Node node = StoreNode(InternalFlagNode(flag)); + const Node node = MakeNode<InternalFlagNode>(flag); if (negated) { return Operation(OperationCode::LogicalNegate, node); } @@ -127,7 +109,7 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { } Node ShaderIR::GetLocalMemory(Node address) { - return StoreNode(LmemNode(address)); + return MakeNode<LmemNode>(address); } Node ShaderIR::GetTemporal(u32 id) { @@ -393,68 +375,4 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { Immediate(bits)); } -/*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, - bool is_signed) { - if (is_signed) { - return operation_code; - } - switch (operation_code) { - case OperationCode::FCastInteger: - return OperationCode::FCastUInteger; - case OperationCode::IAdd: - return OperationCode::UAdd; - case OperationCode::IMul: - return OperationCode::UMul; - case OperationCode::IDiv: - return OperationCode::UDiv; - case OperationCode::IMin: - return OperationCode::UMin; - case OperationCode::IMax: - return OperationCode::UMax; - case OperationCode::ICastFloat: - return OperationCode::UCastFloat; - case OperationCode::ICastUnsigned: - return OperationCode::UCastSigned; - case OperationCode::ILogicalShiftLeft: - return OperationCode::ULogicalShiftLeft; - case OperationCode::ILogicalShiftRight: - return OperationCode::ULogicalShiftRight; - case OperationCode::IArithmeticShiftRight: - return OperationCode::UArithmeticShiftRight; - case OperationCode::IBitwiseAnd: - return OperationCode::UBitwiseAnd; - case OperationCode::IBitwiseOr: - return OperationCode::UBitwiseOr; - case OperationCode::IBitwiseXor: - return OperationCode::UBitwiseXor; - case OperationCode::IBitwiseNot: - return OperationCode::UBitwiseNot; - case OperationCode::IBitfieldInsert: - return OperationCode::UBitfieldInsert; - case OperationCode::IBitCount: - return OperationCode::UBitCount; - case OperationCode::LogicalILessThan: - return OperationCode::LogicalULessThan; - case OperationCode::LogicalIEqual: - return OperationCode::LogicalUEqual; - case OperationCode::LogicalILessEqual: - return OperationCode::LogicalULessEqual; - case OperationCode::LogicalIGreaterThan: - return OperationCode::LogicalUGreaterThan; - case OperationCode::LogicalINotEqual: - return OperationCode::LogicalUNotEqual; - case OperationCode::LogicalIGreaterEqual: - return OperationCode::LogicalUGreaterEqual; - case OperationCode::INegate: - UNREACHABLE_MSG("Can't negate an unsigned integer"); - return {}; - case OperationCode::IAbsolute: - UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); - return {}; - default: - UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); - return {}; - } -} - } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ff7472e30..edcf2288e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,188 +18,14 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/node.h" namespace VideoCommon::Shader { -class OperationNode; -class ConditionalNode; -class GprNode; -class ImmediateNode; -class InternalFlagNode; -class PredicateNode; -class AbufNode; ///< Attribute buffer -class CbufNode; ///< Constant buffer -class LmemNode; ///< Local memory -class GmemNode; ///< Global memory -class CommentNode; - using ProgramCode = std::vector<u64>; -using NodeData = - std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, - PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; -using Node = const NodeData*; -using Node4 = std::array<Node, 4>; -using NodeBlock = std::vector<Node>; - constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; -enum class OperationCode { - Assign, /// (float& dest, float src) -> void - - Select, /// (MetaArithmetic, bool pred, float a, float b) -> float - - FAdd, /// (MetaArithmetic, float a, float b) -> float - FMul, /// (MetaArithmetic, float a, float b) -> float - FDiv, /// (MetaArithmetic, float a, float b) -> float - FFma, /// (MetaArithmetic, float a, float b, float c) -> float - FNegate, /// (MetaArithmetic, float a) -> float - FAbsolute, /// (MetaArithmetic, float a) -> float - FClamp, /// (MetaArithmetic, float value, float min, float max) -> float - FMin, /// (MetaArithmetic, float a, float b) -> float - FMax, /// (MetaArithmetic, float a, float b) -> float - FCos, /// (MetaArithmetic, float a) -> float - FSin, /// (MetaArithmetic, float a) -> float - FExp2, /// (MetaArithmetic, float a) -> float - FLog2, /// (MetaArithmetic, float a) -> float - FInverseSqrt, /// (MetaArithmetic, float a) -> float - FSqrt, /// (MetaArithmetic, float a) -> float - FRoundEven, /// (MetaArithmetic, float a) -> float - FFloor, /// (MetaArithmetic, float a) -> float - FCeil, /// (MetaArithmetic, float a) -> float - FTrunc, /// (MetaArithmetic, float a) -> float - FCastInteger, /// (MetaArithmetic, int a) -> float - FCastUInteger, /// (MetaArithmetic, uint a) -> float - - IAdd, /// (MetaArithmetic, int a, int b) -> int - IMul, /// (MetaArithmetic, int a, int b) -> int - IDiv, /// (MetaArithmetic, int a, int b) -> int - INegate, /// (MetaArithmetic, int a) -> int - IAbsolute, /// (MetaArithmetic, int a) -> int - IMin, /// (MetaArithmetic, int a, int b) -> int - IMax, /// (MetaArithmetic, int a, int b) -> int - ICastFloat, /// (MetaArithmetic, float a) -> int - ICastUnsigned, /// (MetaArithmetic, uint a) -> int - ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int - ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int - IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int - IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int - IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int - IBitwiseNot, /// (MetaArithmetic, int a) -> int - IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int - IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int - IBitCount, /// (MetaArithmetic, int) -> int - - UAdd, /// (MetaArithmetic, uint a, uint b) -> uint - UMul, /// (MetaArithmetic, uint a, uint b) -> uint - UDiv, /// (MetaArithmetic, uint a, uint b) -> uint - UMin, /// (MetaArithmetic, uint a, uint b) -> uint - UMax, /// (MetaArithmetic, uint a, uint b) -> uint - UCastFloat, /// (MetaArithmetic, float a) -> uint - UCastSigned, /// (MetaArithmetic, int a) -> uint - ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint - ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint - UBitwiseNot, /// (MetaArithmetic, uint a) -> uint - UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint - UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint - UBitCount, /// (MetaArithmetic, uint) -> uint - - HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 - HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 - HAbsolute, /// (f16vec2 a) -> f16vec2 - HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 - HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 - HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 - HMergeF32, /// (f16vec2 src) -> float - HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 - HPack2, /// (float a, float b) -> f16vec2 - - LogicalAssign, /// (bool& dst, bool src) -> void - LogicalAnd, /// (bool a, bool b) -> bool - LogicalOr, /// (bool a, bool b) -> bool - LogicalXor, /// (bool a, bool b) -> bool - LogicalNegate, /// (bool a) -> bool - LogicalPick2, /// (bool2 pair, uint index) -> bool - LogicalAll2, /// (bool2 a) -> bool - LogicalAny2, /// (bool2 a) -> bool - - LogicalFLessThan, /// (float a, float b) -> bool - LogicalFEqual, /// (float a, float b) -> bool - LogicalFLessEqual, /// (float a, float b) -> bool - LogicalFGreaterThan, /// (float a, float b) -> bool - LogicalFNotEqual, /// (float a, float b) -> bool - LogicalFGreaterEqual, /// (float a, float b) -> bool - LogicalFIsNan, /// (float a) -> bool - - LogicalILessThan, /// (int a, int b) -> bool - LogicalIEqual, /// (int a, int b) -> bool - LogicalILessEqual, /// (int a, int b) -> bool - LogicalIGreaterThan, /// (int a, int b) -> bool - LogicalINotEqual, /// (int a, int b) -> bool - LogicalIGreaterEqual, /// (int a, int b) -> bool - - LogicalULessThan, /// (uint a, uint b) -> bool - LogicalUEqual, /// (uint a, uint b) -> bool - LogicalULessEqual, /// (uint a, uint b) -> bool - LogicalUGreaterThan, /// (uint a, uint b) -> bool - LogicalUNotEqual, /// (uint a, uint b) -> bool - LogicalUGreaterEqual, /// (uint a, uint b) -> bool - - Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 - - Texture, /// (MetaTexture, float[N] coords) -> float4 - TextureLod, /// (MetaTexture, float[N] coords) -> float4 - TextureGather, /// (MetaTexture, float[N] coords) -> float4 - TextureQueryDimensions, /// (MetaTexture, float a) -> float4 - TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 - TexelFetch, /// (MetaTexture, int[N], int) -> float4 - - Branch, /// (uint branch_target) -> void - PushFlowStack, /// (uint branch_target) -> void - PopFlowStack, /// () -> void - Exit, /// () -> void - Discard, /// () -> void - - EmitVertex, /// () -> void - EndPrimitive, /// () -> void - - YNegate, /// () -> float - LocalInvocationIdX, /// () -> uint - LocalInvocationIdY, /// () -> uint - LocalInvocationIdZ, /// () -> uint - WorkGroupIdX, /// () -> uint - WorkGroupIdY, /// () -> uint - WorkGroupIdZ, /// () -> uint - - Amount, -}; - -enum class InternalFlag { - Zero = 0, - Sign = 1, - Carry = 2, - Overflow = 3, - Amount = 4, -}; - /// Describes the behaviour of code path of a given entry point and a return point. enum class ExitMethod { Undetermined, ///< Internal value. Only occur when analyzing JMP loop. @@ -208,71 +34,6 @@ enum class ExitMethod { AlwaysEnd, ///< All code paths reach a END instruction. }; -class Sampler { -public: - // Use this constructor for bounded Samplers - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{false} {} - - // Use this constructor for bindless Samplers - explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::TextureType type, bool is_array, bool is_shadow) - : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {} - - // Use this only for serialization/deserialization - explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_bindless) - : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_bindless{is_bindless} {} - - std::size_t GetOffset() const { - return offset; - } - - std::size_t GetIndex() const { - return index; - } - - Tegra::Shader::TextureType GetType() const { - return type; - } - - bool IsArray() const { - return is_array; - } - - bool IsShadow() const { - return is_shadow; - } - - bool IsBindless() const { - return is_bindless; - } - - std::pair<u32, u32> GetBindlessCBuf() const { - return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; - } - - bool operator<(const Sampler& rhs) const { - return std::tie(index, offset, type, is_array, is_shadow, is_bindless) < - std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow, - rhs.is_bindless); - } - -private: - /// Offset in TSC memory from which to read the sampler object, as specified by the sampling - /// instruction. - std::size_t offset{}; - std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. - Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) - bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. - bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. - bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. -}; - class ConstBuffer { public: explicit ConstBuffer(u32 max_offset, bool is_indirect) @@ -305,268 +66,11 @@ private: bool is_indirect{}; }; -struct GlobalMemoryBase { - u32 cbuf_index{}; - u32 cbuf_offset{}; - - bool operator<(const GlobalMemoryBase& rhs) const { - return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); - } -}; - struct GlobalMemoryUsage { bool is_read{}; bool is_written{}; }; -struct MetaArithmetic { - bool precise{}; -}; - -struct MetaTexture { - const Sampler& sampler; - Node array{}; - Node depth_compare{}; - std::vector<Node> aoffi; - Node bias{}; - Node lod{}; - Node component{}; - u32 element{}; -}; - -constexpr MetaArithmetic PRECISE = {true}; -constexpr MetaArithmetic NO_PRECISE = {false}; - -using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; - -/// Holds any kind of operation that can be done in the IR -class OperationNode final { -public: - explicit OperationNode(OperationCode code) : code{code} {} - - explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {} - - template <typename... T> - explicit OperationNode(OperationCode code, const T*... operands) - : OperationNode(code, {}, operands...) {} - - template <typename... T> - explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_) - : code{code}, meta{std::move(meta)}, operands{operands_...} {} - - explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) - : code{code}, meta{meta}, operands{std::move(operands)} {} - - explicit OperationNode(OperationCode code, std::vector<Node>&& operands) - : code{code}, operands{std::move(operands)} {} - - OperationCode GetCode() const { - return code; - } - - const Meta& GetMeta() const { - return meta; - } - - std::size_t GetOperandsCount() const { - return operands.size(); - } - - Node operator[](std::size_t operand_index) const { - return operands.at(operand_index); - } - -private: - const OperationCode code; - const Meta meta; - std::vector<Node> operands; -}; - -/// Encloses inside any kind of node that returns a boolean conditionally-executed code -class ConditionalNode final { -public: - explicit ConditionalNode(Node condition, std::vector<Node>&& code) - : condition{condition}, code{std::move(code)} {} - - Node GetCondition() const { - return condition; - } - - const std::vector<Node>& GetCode() const { - return code; - } - -private: - const Node condition; ///< Condition to be satisfied - std::vector<Node> code; ///< Code to execute -}; - -/// A general purpose register -class GprNode final { -public: - explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} - - u32 GetIndex() const { - return static_cast<u32>(index); - } - -private: - const Tegra::Shader::Register index; -}; - -/// A 32-bits value that represents an immediate value -class ImmediateNode final { -public: - explicit constexpr ImmediateNode(u32 value) : value{value} {} - - u32 GetValue() const { - return value; - } - -private: - const u32 value; -}; - -/// One of Maxwell's internal flags -class InternalFlagNode final { -public: - explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} - - InternalFlag GetFlag() const { - return flag; - } - -private: - const InternalFlag flag; -}; - -/// A predicate register, it can be negated without additional nodes -class PredicateNode final { -public: - explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) - : index{index}, negated{negated} {} - - Tegra::Shader::Pred GetIndex() const { - return index; - } - - bool IsNegated() const { - return negated; - } - -private: - const Tegra::Shader::Pred index; - const bool negated; -}; - -/// Attribute buffer memory (known as attributes or varyings in GLSL terms) -class AbufNode final { -public: - // Initialize for standard attributes (index is explicit). - explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, - Node buffer = {}) - : buffer{buffer}, index{index}, element{element} {} - - // Initialize for physical attributes (index is a variable value). - explicit constexpr AbufNode(Node physical_address, Node buffer = {}) - : physical_address{physical_address}, buffer{buffer} {} - - Tegra::Shader::Attribute::Index GetIndex() const { - return index; - } - - u32 GetElement() const { - return element; - } - - Node GetBuffer() const { - return buffer; - } - - bool IsPhysicalBuffer() const { - return physical_address != nullptr; - } - - Node GetPhysicalAddress() const { - return physical_address; - } - -private: - Node physical_address{}; - Node buffer{}; - Tegra::Shader::Attribute::Index index{}; - u32 element{}; -}; - -/// Constant buffer node, usually mapped to uniform buffers in GLSL -class CbufNode final { -public: - explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} - - u32 GetIndex() const { - return index; - } - - Node GetOffset() const { - return offset; - } - -private: - const u32 index; - const Node offset; -}; - -/// Local memory node -class LmemNode final { -public: - explicit constexpr LmemNode(Node address) : address{address} {} - - Node GetAddress() const { - return address; - } - -private: - const Node address; -}; - -/// Global memory node -class GmemNode final { -public: - explicit constexpr GmemNode(Node real_address, Node base_address, - const GlobalMemoryBase& descriptor) - : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {} - - Node GetRealAddress() const { - return real_address; - } - - Node GetBaseAddress() const { - return base_address; - } - - const GlobalMemoryBase& GetDescriptor() const { - return descriptor; - } - -private: - const Node real_address; - const Node base_address; - const GlobalMemoryBase descriptor; -}; - -/// Commentary, can be dropped -class CommentNode final { -public: - explicit CommentNode(std::string text) : text{std::move(text)} {} - - const std::string& GetText() const { - return text; - } - -private: - std::string text; -}; - class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); @@ -663,26 +167,6 @@ private: u32 DecodeXmad(NodeBlock& bb, u32 pc); u32 DecodeOther(NodeBlock& bb, u32 pc); - /// Internalizes node's data and returns a managed pointer to a clone of that node - Node StoreNode(NodeData&& node_data); - - /// Creates a conditional node - Node Conditional(Node condition, std::vector<Node>&& code); - /// Creates a commentary - Node Comment(std::string text); - /// Creates an u32 immediate - Node Immediate(u32 value); - /// Creates a s32 immediate - Node Immediate(s32 value) { - return Immediate(static_cast<u32>(value)); - } - /// Creates a f32 immediate - Node Immediate(f32 value) { - u32 integral; - std::memcpy(&integral, &value, sizeof(u32)); - return Immediate(integral); - } - /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); /// Generates a node representing a 19-bit immediate value @@ -827,37 +311,6 @@ private: std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); - template <typename... T> - Node Operation(OperationCode code, const T*... operands) { - return StoreNode(OperationNode(code, operands...)); - } - - template <typename... T> - Node Operation(OperationCode code, Meta&& meta, const T*... operands) { - return StoreNode(OperationNode(code, std::move(meta), operands...)); - } - - Node Operation(OperationCode code, std::vector<Node>&& operands) { - return StoreNode(OperationNode(code, std::move(operands))); - } - - Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { - return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); - } - - template <typename... T> - Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { - return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); - } - - template <typename... T> - Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { - return StoreNode( - OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); - } - - static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); - const ProgramCode& program_code; const u32 main_offset; @@ -868,8 +321,6 @@ private: std::map<u32, NodeBlock> basic_blocks; NodeBlock global_code; - std::vector<std::unique_ptr<NodeData>> stored_nodes; - std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; std::set<Tegra::Shader::Attribute::Index> used_input_attributes; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 19ede1eb9..fc957d980 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -16,12 +16,12 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { const Node node = code.at(cursor); - if (const auto operation = std::get_if<OperationNode>(node)) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { if (operation->GetCode() == operation_code) { return {node, cursor}; } } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); const auto [found, internal_cursor] = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); @@ -35,11 +35,11 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } // namespace Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { - if (const auto cbuf = std::get_if<CbufNode>(tracked)) { + if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; } - if (const auto gpr = std::get_if<GprNode>(tracked)) { + if (const auto gpr = std::get_if<GprNode>(&*tracked)) { if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { return nullptr; } @@ -51,7 +51,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const } return TrackCbuf(source, code, new_cursor); } - if (const auto operation = std::get_if<OperationNode>(tracked)) { + if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { // Cbuf found in operand @@ -60,7 +60,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const } return nullptr; } - if (const auto conditional = std::get_if<ConditionalNode>(tracked)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { const auto& conditional_code = conditional->GetCode(); return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); } @@ -75,7 +75,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, if (!found) { return {}; } - if (const auto immediate = std::get_if<ImmediateNode>(found)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { return immediate->GetValue(); } return {}; @@ -88,11 +88,11 @@ std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeB if (!found_node) { return {}; } - const auto operation = std::get_if<OperationNode>(found_node); + const auto operation = std::get_if<OperationNode>(&*found_node); ASSERT(operation); const auto& target = (*operation)[0]; - if (const auto gpr_target = std::get_if<GprNode>(target)) { + if (const auto gpr_target = std::get_if<GprNode>(&*target)) { if (gpr_target->GetIndex() == tracked->GetIndex()) { return {(*operation)[1], new_cursor}; } |
