diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 663 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 54 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 2 |
8 files changed, 463 insertions, 329 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b6d9e0ddb..38497678a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -21,9 +21,18 @@ T GetInteger(GLenum pname) { Device::Device() { uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); + max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); } +Device::Device(std::nullptr_t) { + uniform_buffer_alignment = 0; + max_vertex_attributes = 16; + max_varyings = 15; + has_variable_aoffi = true; +} + bool Device::TestVariableAoffi() { const GLchar* AOFFI_TEST = R"(#version 430 core uniform sampler2D tex; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 78ff5ee58..de8490682 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -5,17 +5,27 @@ #pragma once #include <cstddef> +#include "common/common_types.h" namespace OpenGL { class Device { public: - Device(); + explicit Device(); + explicit Device(std::nullptr_t); std::size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + u32 GetMaxVertexAttributes() const { + return max_vertex_attributes; + } + + u32 GetMaxVaryings() const { + return max_varyings; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -24,6 +34,8 @@ private: static bool TestVariableAoffi(); std::size_t uniform_buffer_alignment{}; + u32 max_vertex_attributes{}; + u32 max_varyings{}; bool has_variable_aoffi{}; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3cc945235..dbd8049f5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() { // MakeQuadArray always generates u32 indexes params.index_format = GL_UNSIGNED_INT; params.count = (regs.vertex_buffer.count / 4) * 6; - params.index_buffer_offset = - primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); + params.index_buffer_offset = primitive_assembler.MakeQuadArray( + regs.vertex_buffer.first, regs.vertex_buffer.count); } return params; } @@ -1135,7 +1135,9 @@ void RasterizerOpenGL::SyncTransformFeedback() { void RasterizerOpenGL::SyncPointState() { const auto& regs = system.GPU().Maxwell3D().regs; - state.point.size = regs.point_size; + // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid + // in OpenGL). + state.point.size = std::max(1.0f, regs.point_size); } void RasterizerOpenGL::SyncPolygonOffset() { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1a62795e1..6d4658c8b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -57,15 +57,14 @@ public: shader_source += text; } - void AddLine(std::string_view text) { - AddExpression(text); - AddNewLine(); - } - - void AddLine(char character) { - DEBUG_ASSERT(scope >= 0); - AppendIndentation(); - shader_source += character; + // Forwards all arguments directly to libfmt. + // Note that all formatting requirements for fmt must be + // obeyed when using this function. (e.g. {{ must be used + // printing the character '{' is desirable. Ditto for }} and '}', + // etc). + template <typename... Args> + void AddLine(std::string_view text, Args&&... args) { + AddExpression(fmt::format(text, std::forward<Args>(args)...)); AddNewLine(); } @@ -75,9 +74,7 @@ public: } std::string GenerateTemporary() { - std::string temporary = "tmp"; - temporary += std::to_string(temporary_index++); - return temporary; + return fmt::format("tmp{}", temporary_index++); } std::string GetResult() { @@ -134,6 +131,19 @@ bool IsPrecise(Node node) { return false; } +constexpr bool IsGenericAttribute(Attribute::Index index) { + return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; +} + +constexpr Attribute::Index ToGenericAttribute(u32 value) { + return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0)); +} + +u32 GetGenericAttributeIndex(Attribute::Index index) { + ASSERT(IsGenericAttribute(index)); + return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -152,42 +162,43 @@ public: DeclareConstantBuffers(); DeclareGlobalMemory(); DeclareSamplers(); + DeclarePhysicalAttributeReader(); - code.AddLine("void execute_" + suffix + "() {"); + code.AddLine("void execute_{}() {{", suffix); ++code.scope; // VM's program counter const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;"); + code.AddLine("uint jmp_to = {}u;", first_address); // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE)); + code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); code.AddLine("uint flow_stack_top = 0u;"); - code.AddLine("while (true) {"); + code.AddLine("while (true) {{"); ++code.scope; - code.AddLine("switch (jmp_to) {"); + code.AddLine("switch (jmp_to) {{"); for (const auto& pair : ir.GetBasicBlocks()) { const auto [address, bb] = pair; - code.AddLine(fmt::format("case 0x{:x}u: {{", address)); + code.AddLine("case 0x{:x}u: {{", address); ++code.scope; VisitBlock(bb); --code.scope; - code.AddLine('}'); + code.AddLine("}}"); } code.AddLine("default: return;"); - code.AddLine('}'); + code.AddLine("}}"); for (std::size_t i = 0; i < 2; ++i) { --code.scope; - code.AddLine('}'); + code.AddLine("}}"); } } @@ -227,12 +238,13 @@ private: } void DeclareGeometry() { - if (stage != ShaderStage::Geometry) + if (stage != ShaderStage::Geometry) { return; + } const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_vertices = std::to_string(header.common4.max_output_vertices); - code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;"); + const auto max_vertices = header.common4.max_output_vertices.Value(); + code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); DeclareVertexRedeclarations(); @@ -241,7 +253,7 @@ private: void DeclareVertexRedeclarations() { bool clip_distances_declared = false; - code.AddLine("out gl_PerVertex {"); + code.AddLine("out gl_PerVertex {{"); ++code.scope; code.AddLine("vec4 gl_Position;"); @@ -257,122 +269,143 @@ private: } --code.scope; - code.AddLine("};"); + code.AddLine("}};"); code.AddNewLine(); } void DeclareRegisters() { const auto& registers = ir.GetRegisters(); for (const u32 gpr : registers) { - code.AddLine("float " + GetRegister(gpr) + " = 0;"); + code.AddLine("float {} = 0;", GetRegister(gpr)); } - if (!registers.empty()) + if (!registers.empty()) { code.AddNewLine(); + } } void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { - code.AddLine("bool " + GetPredicate(pred) + " = false;"); + code.AddLine("bool {} = false;", GetPredicate(pred)); } - if (!predicates.empty()) + if (!predicates.empty()) { code.AddNewLine(); + } } void DeclareLocalMemory() { if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];"); + code.AddLine("float {}[{}];", GetLocalMemory(), element_count); code.AddNewLine(); } } void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { - const InternalFlag flag_code = static_cast<InternalFlag>(flag); - code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;"); + const auto flag_code = static_cast<InternalFlag>(flag); + code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); } code.AddNewLine(); } std::string GetInputFlags(AttributeUse attribute) { - std::string out; - switch (attribute) { - case AttributeUse::Constant: - out += "flat "; - break; - case AttributeUse::ScreenLinear: - out += "noperspective "; - break; case AttributeUse::Perspective: // Default, Smooth - break; + return {}; + case AttributeUse::Constant: + return "flat "; + case AttributeUse::ScreenLinear: + return "noperspective "; default: - LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); - UNREACHABLE(); + case AttributeUse::Unused: + UNREACHABLE_MSG("Unused attribute being fetched"); + return {}; + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); + return {}; } - return out; } void DeclareInputAttributes() { - const auto& attributes = ir.GetInputAttributes(); - for (const auto element : attributes) { - const Attribute::Index index = element.first; - if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { - // Skip when it's not a generic attribute - continue; + if (ir.HasPhysicalAttributes()) { + const u32 num_inputs{GetNumPhysicalInputAttributes()}; + for (u32 i = 0; i < num_inputs; ++i) { + DeclareInputAttribute(ToGenericAttribute(i), true); } + code.AddNewLine(); + return; + } - // TODO(bunnei): Use proper number of elements for these - u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - idx += GENERIC_VARYING_START_LOCATION; + const auto& attributes = ir.GetInputAttributes(); + for (const auto index : attributes) { + if (IsGenericAttribute(index)) { + DeclareInputAttribute(index, false); } + } + if (!attributes.empty()) { + code.AddNewLine(); + } + } - std::string attr = GetInputAttribute(index); - if (stage == ShaderStage::Geometry) { - attr = "gs_" + attr + "[]"; - } - std::string suffix; - if (stage == ShaderStage::Fragment) { - const auto input_mode = - header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); - suffix = GetInputFlags(input_mode); + void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { + const u32 generic_index{GetGenericAttributeIndex(index)}; + + std::string name{GetInputAttribute(index)}; + if (stage == ShaderStage::Geometry) { + name = "gs_" + name + "[]"; + } + + std::string suffix; + if (stage == ShaderStage::Fragment) { + const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + if (skip_unused && input_mode == AttributeUse::Unused) { + return; } - code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + - attr + ';'); + suffix = GetInputFlags(input_mode); } - if (!attributes.empty()) - code.AddNewLine(); + + u32 location = generic_index; + if (stage != ShaderStage::Vertex) { + // If inputs are varyings, add an offset + location += GENERIC_VARYING_START_LOCATION; + } + + code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } void DeclareOutputAttributes() { + if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { + for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { + DeclareOutputAttribute(ToGenericAttribute(i)); + } + code.AddNewLine(); + return; + } + const auto& attributes = ir.GetOutputAttributes(); for (const auto index : attributes) { - if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { - // Skip when it's not a generic attribute - continue; + if (IsGenericAttribute(index)) { + DeclareOutputAttribute(index); } - // TODO(bunnei): Use proper number of elements for these - const auto idx = static_cast<u32>(index) - - static_cast<u32>(Attribute::Index::Attribute_0) + - GENERIC_VARYING_START_LOCATION; - code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + - GetOutputAttribute(index) + ';'); - } - if (!attributes.empty()) + } + if (!attributes.empty()) { code.AddNewLine(); + } + } + + void DeclareOutputAttribute(Attribute::Index index) { + const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } void DeclareConstantBuffers() { for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) + - ") uniform " + GetConstBufferBlock(index) + " {"); - code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); - code.AddLine("};"); + code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, + GetConstBufferBlock(index)); + code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index)); + code.AddLine("}};"); code.AddNewLine(); } } @@ -384,17 +417,16 @@ private: // Since we don't know how the shader will use the shader, hint the driver to disable as // much optimizations as possible std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) + if (usage.is_read && !usage.is_written) { qualifier += " readonly"; - else if (usage.is_written && !usage.is_read) + } else if (usage.is_written && !usage.is_read) { qualifier += " writeonly"; + } - const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + - GetGlobalMemoryBlock(base) + " {"); - code.AddLine(" float " + GetGlobalMemory(base) + "[];"); - code.AddLine("};"); + code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", + base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); + code.AddLine(" float {}[];", GetGlobalMemory(base)); + code.AddLine("}};"); code.AddNewLine(); } } @@ -402,7 +434,7 @@ private: void DeclareSamplers() { const auto& samplers = ir.GetSamplers(); for (const auto& sampler : samplers) { - std::string sampler_type = [&]() { + std::string sampler_type = [&sampler] { switch (sampler.GetType()) { case Tegra::Shader::TextureType::Texture1D: return "sampler1D"; @@ -417,16 +449,52 @@ private: return "sampler2D"; } }(); - if (sampler.IsArray()) + if (sampler.IsArray()) { sampler_type += "Array"; - if (sampler.IsShadow()) + } + if (sampler.IsShadow()) { sampler_type += "Shadow"; + } - code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) + - ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); + code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(), + sampler_type, GetSampler(sampler)); } - if (!samplers.empty()) + if (!samplers.empty()) { code.AddNewLine(); + } + } + + void DeclarePhysicalAttributeReader() { + if (!ir.HasPhysicalAttributes()) { + return; + } + code.AddLine("float readPhysicalAttribute(uint physical_address) {{"); + ++code.scope; + code.AddLine("switch (physical_address) {{"); + + // Just declare generic attributes for now. + const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; + for (u32 index = 0; index < num_attributes; ++index) { + const auto attribute{ToGenericAttribute(index)}; + for (u32 element = 0; element < 4; ++element) { + constexpr u32 generic_base{0x80}; + constexpr u32 generic_stride{16}; + constexpr u32 element_stride{4}; + const u32 address{generic_base + index * generic_stride + element * element_stride}; + + const bool declared{stage != ShaderStage::Fragment || + header.ps.GetAttributeUse(index) != AttributeUse::Unused}; + const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; + code.AddLine("case 0x{:x}: return {};", address, value); + } + } + + code.AddLine("default: return 0;"); + + code.AddLine("}}"); + --code.scope; + code.AddLine("}}"); + code.AddNewLine(); } void VisitBlock(const NodeBlock& bb) { @@ -450,23 +518,26 @@ private: return {}; } return (this->*decompiler)(*operation); + } - } else if (const auto gpr = std::get_if<GprNode>(node)) { + if (const auto gpr = std::get_if<GprNode>(node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "0"; } return GetRegister(index); + } - } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { + if (const auto immediate = std::get_if<ImmediateNode>(node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits return fmt::format("utof({}u)", immediate->GetValue()); } return fmt::format("utof(0x{:x}u)", immediate->GetValue()); + } - } else if (const auto predicate = std::get_if<PredicateNode>(node)) { + if (const auto predicate = std::get_if<PredicateNode>(node)) { const auto value = [&]() -> std::string { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -478,77 +549,22 @@ private: } }(); if (predicate->IsNegated()) { - return "!(" + value + ')'; + return fmt::format("!({})", value); } return value; + } - } else if (const auto abuf = std::get_if<AbufNode>(node)) { - const auto attribute = abuf->GetIndex(); - const auto element = abuf->GetElement(); - - const auto GeometryPass = [&](const std::string& name) { - if (stage == ShaderStage::Geometry && abuf->GetBuffer()) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) + - ") % MAX_VERTEX_INPUT]"; - } - return name; - }; - - switch (attribute) { - case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { - return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); - } - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "gl_PointCoord.x"; - case 1: - return "gl_PointCoord.y"; - case 2: - case 3: - return "0"; - } - UNREACHABLE(); - return "0"; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderStage::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return "uintBitsToFloat(config_pack[0])"; - case 3: - return "uintBitsToFloat(gl_VertexID)"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return "0"; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderStage::Fragment); - switch (element) { - case 3: - return "itof(gl_FrontFacing ? -1 : 0)"; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return "0"; - default: - if (attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31) { - return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); - } - break; + if (const auto abuf = std::get_if<AbufNode>(node)) { + UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, + "Physical attributes in geometry shaders are not implemented"); + if (abuf->IsPhysicalBuffer()) { + return fmt::format("readPhysicalAttribute(ftou({}))", + Visit(abuf->GetPhysicalAddress())); } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); + } - } else if (const auto cbuf = std::get_if<CbufNode>(node)) { + if (const auto cbuf = std::get_if<CbufNode>(node)) { const Node offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(offset)) { // Direct access @@ -556,48 +572,117 @@ private: ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / (4 * 4), (offset_imm / 4) % 4); + } - } else if (std::holds_alternative<OperationNode>(*offset)) { + if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);"); + code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), final_offset, final_offset); - - } else { - UNREACHABLE_MSG("Unmanaged offset node type"); } - } else if (const auto gmem = std::get_if<GmemNode>(node)) { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + + if (const auto gmem = std::get_if<GmemNode>(node)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); - const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } - } else if (const auto lmem = std::get_if<LmemNode>(node)) { + if (const auto lmem = std::get_if<LmemNode>(node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); + } - } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { + if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { return GetInternalFlag(internal_flag->GetFlag()); + } - } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(node)) { // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {"); + code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); ++code.scope; VisitBlock(conditional->GetCode()); --code.scope; - code.AddLine('}'); + code.AddLine("}}"); return {}; + } - } else if (const auto comment = std::get_if<CommentNode>(node)) { + if (const auto comment = std::get_if<CommentNode>(node)) { return "// " + comment->GetText(); } + UNREACHABLE(); return {}; } + std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + const auto GeometryPass = [&](std::string_view name) { + if (stage == ShaderStage::Geometry && buffer) { + // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games + // set an 0x80000000 index for those and the shader fails to build. Find out why + // this happens and what's its intent. + return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer)); + } + return std::string(name); + }; + + switch (attribute) { + case Attribute::Index::Position: + if (stage != ShaderStage::Fragment) { + return GeometryPass("position") + GetSwizzle(element); + } else { + return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); + } + case Attribute::Index::PointCoord: + switch (element) { + case 0: + return "gl_PointCoord.x"; + case 1: + return "gl_PointCoord.y"; + case 2: + case 3: + return "0"; + } + UNREACHABLE(); + return "0"; + case Attribute::Index::TessCoordInstanceIDVertexID: + // TODO(Subv): Find out what the values are for the first two elements when inside a + // vertex shader, and what's the value of the fourth element when inside a Tess Eval + // shader. + ASSERT(stage == ShaderStage::Vertex); + switch (element) { + case 2: + // Config pack's first value is instance_id. + return "uintBitsToFloat(config_pack[0])"; + case 3: + return "uintBitsToFloat(gl_VertexID)"; + } + UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); + return "0"; + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == ShaderStage::Fragment); + switch (element) { + case 3: + return "itof(gl_FrontFacing ? -1 : 0)"; + } + UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); + return "0"; + default: + if (IsGenericAttribute(attribute)) { + return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); + } + break; + } + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + return "0"; + } + std::string ApplyPrecise(Operation operation, const std::string& value) { if (!IsPrecise(operation)) { return value; @@ -606,7 +691,7 @@ private: const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; const std::string temporary = code.GenerateTemporary(); - code.AddLine(precise + "float " + temporary + " = " + value + ';'); + code.AddLine("{}float {} = {};", precise, temporary, value); return temporary; } @@ -620,7 +705,7 @@ private: } const std::string temporary = code.GenerateTemporary(); - code.AddLine("float " + temporary + " = " + Visit(operand) + ';'); + code.AddLine("float {} = {};", temporary, Visit(operand)); return temporary; } @@ -635,31 +720,32 @@ private: case Type::Float: return value; case Type::Int: - return "ftoi(" + value + ')'; + return fmt::format("ftoi({})", value); case Type::Uint: - return "ftou(" + value + ')'; + return fmt::format("ftou({})", value); case Type::HalfFloat: - return "toHalf2(" + value + ')'; + return fmt::format("toHalf2({})", value); } UNREACHABLE(); return value; } - std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { + std::string BitwiseCastResult(const std::string& value, Type type, + bool needs_parenthesis = false) { switch (type) { case Type::Bool: case Type::Bool2: case Type::Float: if (needs_parenthesis) { - return '(' + value + ')'; + return fmt::format("({})", value); } return value; case Type::Int: - return "itof(" + value + ')'; + return fmt::format("itof({})", value); case Type::Uint: - return "utof(" + value + ')'; + return fmt::format("utof({})", value); case Type::HalfFloat: - return "fromHalf2(" + value + ')'; + return fmt::format("fromHalf2({})", value); } UNREACHABLE(); return value; @@ -667,27 +753,27 @@ private: std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, Type type_a, bool needs_parenthesis = true) { - return ApplyPrecise(operation, - BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')', - result_type, needs_parenthesis)); + const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a)); + + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis)); } std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, Type type_a, Type type_b) { const std::string op_a = VisitOperand(operation, 0, type_a); const std::string op_b = VisitOperand(operation, 1, type_b); + const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - return ApplyPrecise( - operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, Type type_a, Type type_b) { const std::string op_a = VisitOperand(operation, 0, type_a); const std::string op_b = VisitOperand(operation, 1, type_b); + const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - return ApplyPrecise(operation, - BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, @@ -695,10 +781,9 @@ private: const std::string op_a = VisitOperand(operation, 0, type_a); const std::string op_b = VisitOperand(operation, 1, type_b); const std::string op_c = VisitOperand(operation, 2, type_c); + const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - return ApplyPrecise( - operation, - BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, @@ -707,10 +792,9 @@ private: const std::string op_b = VisitOperand(operation, 1, type_b); const std::string op_c = VisitOperand(operation, 2, type_c); const std::string op_d = VisitOperand(operation, 3, type_d); + const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + - op_c + ", " + op_d + ')', - result_type)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type)); } std::string GenerateTexture(Operation operation, const std::string& function_suffix, @@ -773,7 +857,7 @@ private: // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else { - expr += "ftoi(" + Visit(operand) + ')'; + expr += fmt::format("ftoi({})", Visit(operand)); } break; case Type::Float: @@ -806,7 +890,7 @@ private: expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else if (device.HasVariableAoffi()) { // Avoid using variable AOFFI on unsupported devices. - expr += "ftoi(" + Visit(operand) + ')'; + expr += fmt::format("ftoi({})", Visit(operand)); } else { // Insert 0 on devices not supporting variable AOFFI. expr += '0'; @@ -831,8 +915,9 @@ private: return {}; } target = GetRegister(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); + target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: @@ -840,12 +925,11 @@ private: case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: - return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']'; + return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); case Attribute::Index::ClipDistances4567: - return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']'; + return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); default: - if (attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31) { + if (IsGenericAttribute(attribute)) { return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", @@ -853,21 +937,18 @@ private: return "0"; } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { - target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; - + target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } else if (const auto gmem = std::get_if<GmemNode>(dest)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); - const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); - } else { UNREACHABLE_MSG("Assign called without a proper target"); } - code.AddLine(target + " = " + Visit(src) + ';'); + code.AddLine("{} = {};", target, Visit(src)); return {}; } @@ -920,8 +1001,9 @@ private: const std::string condition = Visit(operation[0]); const std::string true_case = Visit(operation[1]); const std::string false_case = Visit(operation[2]); - return ApplyPrecise(operation, - '(' + condition + " ? " + true_case + " : " + false_case + ')'); + const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); + + return ApplyPrecise(operation, op_str); } std::string FCos(Operation operation) { @@ -985,9 +1067,9 @@ private: std::string ILogicalShiftRight(Operation operation) { const std::string op_a = VisitOperand(operation, 0, Type::Uint); const std::string op_b = VisitOperand(operation, 1, Type::Uint); + const std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - return ApplyPrecise(operation, - BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int)); + return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int)); } std::string IArithmeticShiftRight(Operation operation) { @@ -1043,11 +1125,12 @@ private: } std::string HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) -> std::string { + const auto GetNegate = [&](std::size_t index) { return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; }; - const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" + - GetNegate(1) + ", " + GetNegate(2) + "))"; + const std::string value = + fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat), + GetNegate(1), GetNegate(2)); return BitwiseCastResult(value, Type::HalfFloat); } @@ -1055,7 +1138,8 @@ private: const std::string value = VisitOperand(operation, 0, Type::HalfFloat); const std::string min = VisitOperand(operation, 1, Type::Float); const std::string max = VisitOperand(operation, 2, Type::Float); - const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; + const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); + return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); } @@ -1066,34 +1150,35 @@ private: case Tegra::Shader::HalfType::H0_H1: return operand; case Tegra::Shader::HalfType::F32: - return "vec2(fromHalf2(" + operand + "))"; + return fmt::format("vec2(fromHalf2({}))", operand); case Tegra::Shader::HalfType::H0_H0: - return "vec2(" + operand + "[0])"; + return fmt::format("vec2({}[0])", operand); case Tegra::Shader::HalfType::H1_H1: - return "vec2(" + operand + "[1])"; + return fmt::format("vec2({}[1])", operand); } UNREACHABLE(); return "0"; }(); - return "fromHalf2(" + value + ')'; + return fmt::format("fromHalf2({})", value); } std::string HMergeF32(Operation operation) { - return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; + return fmt::format("float(toHalf2({})[0])", Visit(operation[0])); } std::string HMergeH0(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + - Visit(operation[0]) + ")[1]))"; + return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]), + Visit(operation[0])); } std::string HMergeH1(Operation operation) { - return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" + - Visit(operation[1]) + ")[1]))"; + return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]), + Visit(operation[1])); } std::string HPack2(Operation operation) { - return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))"; + return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]), + Visit(operation[1])); } template <Type type> @@ -1151,7 +1236,7 @@ private: target = GetInternalFlag(flag->GetFlag()); } - code.AddLine(target + " = " + Visit(src) + ';'); + code.AddLine("{} = {};", target, Visit(src)); return {}; } @@ -1173,7 +1258,7 @@ private: std::string LogicalPick2(Operation operation) { const std::string pair = VisitOperand(operation, 0, Type::Bool2); - return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']'; + return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); } std::string LogicalAll2(Operation operation) { @@ -1185,15 +1270,15 @@ private: } template <bool with_nan> - std::string GenerateHalfComparison(Operation operation, std::string compare_op) { - std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat)}; + std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { + const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, + Type::HalfFloat, Type::HalfFloat)}; if constexpr (!with_nan) { return comparison; } - return "halfFloatNanComparison(" + comparison + ", " + - VisitOperand(operation, 0, Type::HalfFloat) + ", " + - VisitOperand(operation, 1, Type::HalfFloat) + ')'; + return fmt::format("halfFloatNanComparison({}, {}, {})", comparison, + VisitOperand(operation, 0, Type::HalfFloat), + VisitOperand(operation, 1, Type::HalfFloat)); } template <bool with_nan> @@ -1270,12 +1355,12 @@ private: switch (meta->element) { case 0: case 1: - return "itof(int(textureSize(" + sampler + ", " + lod + ')' + - GetSwizzle(meta->element) + "))"; + return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod, + GetSwizzle(meta->element)); case 2: return "0"; case 3: - return "itof(textureQueryLevels(" + sampler + "))"; + return fmt::format("itof(textureQueryLevels({}))", sampler); } UNREACHABLE(); return "0"; @@ -1286,8 +1371,9 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + - GetSwizzle(meta->element) + "))"; + return fmt::format("itof(int(({} * vec2(256)){}))", + GenerateTexture(operation, "QueryLod", {}), + GetSwizzle(meta->element)); } return "0"; } @@ -1326,7 +1412,7 @@ private: const auto target = std::get_if<ImmediateNode>(operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue())); + code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); code.AddLine("break;"); return {}; } @@ -1335,7 +1421,7 @@ private: const auto target = std::get_if<ImmediateNode>(operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue())); + code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); return {}; } @@ -1361,7 +1447,7 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - code.AddLine("if (alpha_test[0] != 0) {"); + code.AddLine("if (alpha_test[0] != 0) {{"); ++code.scope; // We start on the register containing the alpha value in the first RT. u32 current_reg = 3; @@ -1372,13 +1458,12 @@ private: header.ps.IsColorComponentOutputEnabled(render_target, 1) || header.ps.IsColorComponentOutputEnabled(render_target, 2) || header.ps.IsColorComponentOutputEnabled(render_target, 3)) { - code.AddLine( - fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg))); + code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)); current_reg += 4; } } --code.scope; - code.AddLine('}'); + code.AddLine("}}"); // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. @@ -1387,8 +1472,8 @@ private: // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, - SafeGetRegister(current_reg))); + code.AddLine("FragColor{}[{}] = {};", render_target, component, + SafeGetRegister(current_reg)); ++current_reg; } } @@ -1397,7 +1482,7 @@ private: if (header.ps.omap.depth) { // The depth output is always 2 registers after the last color output, and current_reg // already contains one past the last color register. - code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';'); + code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1)); } code.AddLine("return;"); @@ -1407,11 +1492,11 @@ private: std::string Discard(Operation operation) { // Enclose "discard" in a conditional, so that GLSL compilation does not complain // about unexecuted instructions that may follow this. - code.AddLine("if (true) {"); + code.AddLine("if (true) {{"); ++code.scope; code.AddLine("discard;"); --code.scope; - code.AddLine("}"); + code.AddLine("}}"); return {}; } @@ -1591,15 +1676,11 @@ private: } std::string GetInputAttribute(Attribute::Index attribute) const { - const auto index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - return GetDeclarationWithSuffix(index, "input_attr"); + return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); } std::string GetOutputAttribute(Attribute::Index attribute) const { - const auto index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - return GetDeclarationWithSuffix(index, "output_attr"); + return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); } std::string GetConstBuffer(u32 index) const { @@ -1629,7 +1710,7 @@ private: const auto index = static_cast<u32>(flag); ASSERT(index < static_cast<u32>(InternalFlag::Amount)); - return std::string(InternalFlagNames[index]) + '_' + suffix; + return fmt::format("{}_{}", InternalFlagNames[index], suffix); } std::string GetSampler(const Sampler& sampler) const { @@ -1637,7 +1718,20 @@ private: } std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { - return name + '_' + std::to_string(index) + '_' + suffix; + return fmt::format("{}_{}_{}", name, index, suffix); + } + + u32 GetNumPhysicalInputAttributes() const { + return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); + } + + u32 GetNumPhysicalAttributes() const { + return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); + } + + u32 GetNumPhysicalVaryings() const { + return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, + Maxwell::NumVaryings); } const Device& device; @@ -1652,24 +1746,25 @@ private: } // Anonymous namespace std::string GetCommonDeclarations() { - const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define ftoi floatBitsToInt\n" - "#define ftou floatBitsToUint\n" - "#define itof intBitsToFloat\n" - "#define utof uintBitsToFloat\n\n" - "float fromHalf2(vec2 pair) {\n" - " return utof(packHalf2x16(pair));\n" - "}\n\n" - "vec2 toHalf2(float value) {\n" - " return unpackHalf2x16(ftou(value));\n" - "}\n\n" - "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" - " bvec2 is_nan1 = isnan(pair1);\n" - " bvec2 is_nan2 = isnan(pair2);\n" - " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " - "is_nan2.y);\n" - "}\n"; + return fmt::format( + "#define MAX_CONSTBUFFER_ELEMENTS {}\n" + "#define ftoi floatBitsToInt\n" + "#define ftou floatBitsToUint\n" + "#define itof intBitsToFloat\n" + "#define utof uintBitsToFloat\n\n" + "float fromHalf2(vec2 pair) {{\n" + " return utof(packHalf2x16(pair));\n" + "}}\n\n" + "vec2 toHalf2(float value) {{\n" + " return unpackHalf2x16(ftou(value));\n" + "}}\n\n" + "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" + " bvec2 is_nan1 = isnan(pair1);\n" + " bvec2 is_nan2 = isnan(pair2);\n" + " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " + "is_nan2.y);\n" + "}}\n", + MAX_CONSTBUFFER_ELEMENTS); } ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 254c0d499..fba9c594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { return true; } -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) - : system{system}, precompiled_cache_virtual_file_offset{0} {} +ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} + +ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> ShaderDiskCacheOpenGL::LoadTransferable() { @@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - const auto entry = LoadDecompiledEntry(); + auto entry = LoadDecompiledEntry(); if (!entry) { return {}; } @@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - std::vector<u8> code(code_size); + std::string code(code_size, '\0'); if (!LoadArrayFromPrecompiled(code.data(), code.size())) { return {}; } ShaderDiskCacheDecompiled entry; - entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size); + entry.code = std::move(code); u32 const_buffers_count{}; if (!LoadObjectFromPrecompiled(const_buffers_count)) { @@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < const_buffers_count; ++i) { u32 max_offset{}; u32 index{}; - u8 is_indirect{}; + bool is_indirect{}; if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(is_indirect)) { return {}; } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index); + entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); } u32 samplers_count{}; @@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 offset{}; u64 index{}; u32 type{}; - u8 is_array{}; - u8 is_shadow{}; - u8 is_bindless{}; + bool is_array{}; + bool is_shadow{}; + bool is_bindless{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { return {}; } - entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), - static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), - is_array != 0, is_shadow != 0, is_bindless != 0); + entry.entries.samplers.emplace_back( + static_cast<std::size_t>(offset), static_cast<std::size_t>(index), + static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); } u32 global_memory_count{}; @@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; - u8 is_read{}; - u8 is_written{}; + bool is_read{}; + bool is_written{}; if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, - is_written != 0); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, + is_written); } for (auto& clip_distance : entry.entries.clip_distances) { - u8 clip_distance_raw{}; - if (!LoadObjectFromPrecompiled(clip_distance_raw)) + if (!LoadObjectFromPrecompiled(clip_distance)) { return {}; - clip_distance = clip_distance_raw != 0; + } } u64 shader_length{}; @@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: for (const auto& cbuf : entries.const_buffers) { if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) { + !SaveObjectToPrecompiled(cbuf.IsIndirect())) { return false; } } @@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) { + !SaveObjectToPrecompiled(sampler.IsArray()) || + !SaveObjectToPrecompiled(sampler.IsShadow()) || + !SaveObjectToPrecompiled(sampler.IsBindless())) { return false; } } @@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: for (const auto& gmem : entries.global_memory_entries) { if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) { + !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { return false; } } for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) { + if (!SaveObjectToPrecompiled(clip_distance)) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 0142b2e3b..2da0a4a23 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -70,14 +70,14 @@ namespace std { template <> struct hash<OpenGL::BaseBindings> { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const { + std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; } }; template <> struct hash<OpenGL::ShaderDiskCacheUsage> { - std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const { + std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { return static_cast<std::size_t>(usage.unique_identifier) ^ std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; } @@ -162,6 +162,7 @@ struct ShaderDiskCacheDump { class ShaderDiskCacheOpenGL { public: explicit ShaderDiskCacheOpenGL(Core::System& system); + ~ShaderDiskCacheOpenGL(); /// Loads transferable cache. If file has a old version or on failure, it deletes the file. std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> @@ -259,20 +260,35 @@ private: return SaveArrayToPrecompiled(&object, 1); } + bool SaveObjectToPrecompiled(bool object) { + const auto value = static_cast<u8>(object); + return SaveArrayToPrecompiled(&value, 1); + } + template <typename T> bool LoadObjectFromPrecompiled(T& object) { return LoadArrayFromPrecompiled(&object, 1); } - // Copre system + bool LoadObjectFromPrecompiled(bool& object) { + u8 value; + const bool read_ok = LoadArrayFromPrecompiled(&value, 1); + if (!read_ok) { + return false; + } + + object = value != 0; + return true; + } + + // Core system Core::System& system; // Stored transferable shaders std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file + // Stores whole precompiled cache which will be read from/saved to the precompiled cache file FileSys::VectorVfsFile precompiled_cache_virtual_file; // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset; + std::size_t precompiled_cache_virtual_file_offset = 0; // The cache has been loaded at boot bool tried_to_load{}; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 6abf948f8..7ab0b4553 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); out += program.first; if (setup.IsDualProgram()) { - ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); + const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); ProgramResult program_b = Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); @@ -76,7 +76,7 @@ void main() { } })"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { @@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); out += program.first; @@ -107,7 +107,7 @@ void main() { execute_geometry(); };)"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { @@ -160,7 +160,7 @@ bool AlphaFunc(in float value) { } )"; - ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); @@ -172,7 +172,7 @@ void main() { } )"; - return {out, program.second}; + return {std::move(out), std::move(program.second)}; } } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 95b773135..ed7b5cff0 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -126,6 +126,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; + case Maxwell::PrimitiveTopology::TriangleFan: + return GL_TRIANGLE_FAN; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); |
