aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_device.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp663
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp54
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp14
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h2
8 files changed, 463 insertions, 329 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b6d9e0ddb..38497678a 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -21,9 +21,18 @@ T GetInteger(GLenum pname) {
Device::Device() {
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
+ max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
+ max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_variable_aoffi = TestVariableAoffi();
}
+Device::Device(std::nullptr_t) {
+ uniform_buffer_alignment = 0;
+ max_vertex_attributes = 16;
+ max_varyings = 15;
+ has_variable_aoffi = true;
+}
+
bool Device::TestVariableAoffi() {
const GLchar* AOFFI_TEST = R"(#version 430 core
uniform sampler2D tex;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 78ff5ee58..de8490682 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -5,17 +5,27 @@
#pragma once
#include <cstddef>
+#include "common/common_types.h"
namespace OpenGL {
class Device {
public:
- Device();
+ explicit Device();
+ explicit Device(std::nullptr_t);
std::size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
+ u32 GetMaxVertexAttributes() const {
+ return max_vertex_attributes;
+ }
+
+ u32 GetMaxVaryings() const {
+ return max_varyings;
+ }
+
bool HasVariableAoffi() const {
return has_variable_aoffi;
}
@@ -24,6 +34,8 @@ private:
static bool TestVariableAoffi();
std::size_t uniform_buffer_alignment{};
+ u32 max_vertex_attributes{};
+ u32 max_varyings{};
bool has_variable_aoffi{};
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 3cc945235..dbd8049f5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -261,8 +261,8 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
// MakeQuadArray always generates u32 indexes
params.index_format = GL_UNSIGNED_INT;
params.count = (regs.vertex_buffer.count / 4) * 6;
- params.index_buffer_offset =
- primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
+ params.index_buffer_offset = primitive_assembler.MakeQuadArray(
+ regs.vertex_buffer.first, regs.vertex_buffer.count);
}
return params;
}
@@ -1135,7 +1135,9 @@ void RasterizerOpenGL::SyncTransformFeedback() {
void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
- state.point.size = regs.point_size;
+ // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
+ // in OpenGL).
+ state.point.size = std::max(1.0f, regs.point_size);
}
void RasterizerOpenGL::SyncPolygonOffset() {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 1a62795e1..6d4658c8b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -57,15 +57,14 @@ public:
shader_source += text;
}
- void AddLine(std::string_view text) {
- AddExpression(text);
- AddNewLine();
- }
-
- void AddLine(char character) {
- DEBUG_ASSERT(scope >= 0);
- AppendIndentation();
- shader_source += character;
+ // Forwards all arguments directly to libfmt.
+ // Note that all formatting requirements for fmt must be
+ // obeyed when using this function. (e.g. {{ must be used
+ // printing the character '{' is desirable. Ditto for }} and '}',
+ // etc).
+ template <typename... Args>
+ void AddLine(std::string_view text, Args&&... args) {
+ AddExpression(fmt::format(text, std::forward<Args>(args)...));
AddNewLine();
}
@@ -75,9 +74,7 @@ public:
}
std::string GenerateTemporary() {
- std::string temporary = "tmp";
- temporary += std::to_string(temporary_index++);
- return temporary;
+ return fmt::format("tmp{}", temporary_index++);
}
std::string GetResult() {
@@ -134,6 +131,19 @@ bool IsPrecise(Node node) {
return false;
}
+constexpr bool IsGenericAttribute(Attribute::Index index) {
+ return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
+}
+
+constexpr Attribute::Index ToGenericAttribute(u32 value) {
+ return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0));
+}
+
+u32 GetGenericAttributeIndex(Attribute::Index index) {
+ ASSERT(IsGenericAttribute(index));
+ return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
+}
+
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
@@ -152,42 +162,43 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
+ DeclarePhysicalAttributeReader();
- code.AddLine("void execute_" + suffix + "() {");
+ code.AddLine("void execute_{}() {{", suffix);
++code.scope;
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;");
+ code.AddLine("uint jmp_to = {}u;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
- code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE));
+ code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE);
code.AddLine("uint flow_stack_top = 0u;");
- code.AddLine("while (true) {");
+ code.AddLine("while (true) {{");
++code.scope;
- code.AddLine("switch (jmp_to) {");
+ code.AddLine("switch (jmp_to) {{");
for (const auto& pair : ir.GetBasicBlocks()) {
const auto [address, bb] = pair;
- code.AddLine(fmt::format("case 0x{:x}u: {{", address));
+ code.AddLine("case 0x{:x}u: {{", address);
++code.scope;
VisitBlock(bb);
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
}
code.AddLine("default: return;");
- code.AddLine('}');
+ code.AddLine("}}");
for (std::size_t i = 0; i < 2; ++i) {
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
}
}
@@ -227,12 +238,13 @@ private:
}
void DeclareGeometry() {
- if (stage != ShaderStage::Geometry)
+ if (stage != ShaderStage::Geometry) {
return;
+ }
const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_vertices = std::to_string(header.common4.max_output_vertices);
- code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;");
+ const auto max_vertices = header.common4.max_output_vertices.Value();
+ code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
code.AddNewLine();
DeclareVertexRedeclarations();
@@ -241,7 +253,7 @@ private:
void DeclareVertexRedeclarations() {
bool clip_distances_declared = false;
- code.AddLine("out gl_PerVertex {");
+ code.AddLine("out gl_PerVertex {{");
++code.scope;
code.AddLine("vec4 gl_Position;");
@@ -257,122 +269,143 @@ private:
}
--code.scope;
- code.AddLine("};");
+ code.AddLine("}};");
code.AddNewLine();
}
void DeclareRegisters() {
const auto& registers = ir.GetRegisters();
for (const u32 gpr : registers) {
- code.AddLine("float " + GetRegister(gpr) + " = 0;");
+ code.AddLine("float {} = 0;", GetRegister(gpr));
}
- if (!registers.empty())
+ if (!registers.empty()) {
code.AddNewLine();
+ }
}
void DeclarePredicates() {
const auto& predicates = ir.GetPredicates();
for (const auto pred : predicates) {
- code.AddLine("bool " + GetPredicate(pred) + " = false;");
+ code.AddLine("bool {} = false;", GetPredicate(pred));
}
- if (!predicates.empty())
+ if (!predicates.empty()) {
code.AddNewLine();
+ }
}
void DeclareLocalMemory() {
if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];");
+ code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
}
void DeclareInternalFlags() {
for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
- const InternalFlag flag_code = static_cast<InternalFlag>(flag);
- code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;");
+ const auto flag_code = static_cast<InternalFlag>(flag);
+ code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
}
code.AddNewLine();
}
std::string GetInputFlags(AttributeUse attribute) {
- std::string out;
-
switch (attribute) {
- case AttributeUse::Constant:
- out += "flat ";
- break;
- case AttributeUse::ScreenLinear:
- out += "noperspective ";
- break;
case AttributeUse::Perspective:
// Default, Smooth
- break;
+ return {};
+ case AttributeUse::Constant:
+ return "flat ";
+ case AttributeUse::ScreenLinear:
+ return "noperspective ";
default:
- LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
- UNREACHABLE();
+ case AttributeUse::Unused:
+ UNREACHABLE_MSG("Unused attribute being fetched");
+ return {};
+ UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
+ return {};
}
- return out;
}
void DeclareInputAttributes() {
- const auto& attributes = ir.GetInputAttributes();
- for (const auto element : attributes) {
- const Attribute::Index index = element.first;
- if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
- // Skip when it's not a generic attribute
- continue;
+ if (ir.HasPhysicalAttributes()) {
+ const u32 num_inputs{GetNumPhysicalInputAttributes()};
+ for (u32 i = 0; i < num_inputs; ++i) {
+ DeclareInputAttribute(ToGenericAttribute(i), true);
}
+ code.AddNewLine();
+ return;
+ }
- // TODO(bunnei): Use proper number of elements for these
- u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
- if (stage != ShaderStage::Vertex) {
- // If inputs are varyings, add an offset
- idx += GENERIC_VARYING_START_LOCATION;
+ const auto& attributes = ir.GetInputAttributes();
+ for (const auto index : attributes) {
+ if (IsGenericAttribute(index)) {
+ DeclareInputAttribute(index, false);
}
+ }
+ if (!attributes.empty()) {
+ code.AddNewLine();
+ }
+ }
- std::string attr = GetInputAttribute(index);
- if (stage == ShaderStage::Geometry) {
- attr = "gs_" + attr + "[]";
- }
- std::string suffix;
- if (stage == ShaderStage::Fragment) {
- const auto input_mode =
- header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
- suffix = GetInputFlags(input_mode);
+ void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
+ const u32 generic_index{GetGenericAttributeIndex(index)};
+
+ std::string name{GetInputAttribute(index)};
+ if (stage == ShaderStage::Geometry) {
+ name = "gs_" + name + "[]";
+ }
+
+ std::string suffix;
+ if (stage == ShaderStage::Fragment) {
+ const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+ if (skip_unused && input_mode == AttributeUse::Unused) {
+ return;
}
- code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
- attr + ';');
+ suffix = GetInputFlags(input_mode);
}
- if (!attributes.empty())
- code.AddNewLine();
+
+ u32 location = generic_index;
+ if (stage != ShaderStage::Vertex) {
+ // If inputs are varyings, add an offset
+ location += GENERIC_VARYING_START_LOCATION;
+ }
+
+ code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
}
void DeclareOutputAttributes() {
+ if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) {
+ for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
+ DeclareOutputAttribute(ToGenericAttribute(i));
+ }
+ code.AddNewLine();
+ return;
+ }
+
const auto& attributes = ir.GetOutputAttributes();
for (const auto index : attributes) {
- if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
- // Skip when it's not a generic attribute
- continue;
+ if (IsGenericAttribute(index)) {
+ DeclareOutputAttribute(index);
}
- // TODO(bunnei): Use proper number of elements for these
- const auto idx = static_cast<u32>(index) -
- static_cast<u32>(Attribute::Index::Attribute_0) +
- GENERIC_VARYING_START_LOCATION;
- code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " +
- GetOutputAttribute(index) + ';');
- }
- if (!attributes.empty())
+ }
+ if (!attributes.empty()) {
code.AddNewLine();
+ }
+ }
+
+ void DeclareOutputAttribute(Attribute::Index index) {
+ const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+ code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
}
void DeclareConstantBuffers() {
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
- ") uniform " + GetConstBufferBlock(index) + " {");
- code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
- code.AddLine("};");
+ code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
+ GetConstBufferBlock(index));
+ code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index));
+ code.AddLine("}};");
code.AddNewLine();
}
}
@@ -384,17 +417,16 @@ private:
// Since we don't know how the shader will use the shader, hint the driver to disable as
// much optimizations as possible
std::string qualifier = "coherent volatile";
- if (usage.is_read && !usage.is_written)
+ if (usage.is_read && !usage.is_written) {
qualifier += " readonly";
- else if (usage.is_written && !usage.is_read)
+ } else if (usage.is_written && !usage.is_read) {
qualifier += " writeonly";
+ }
- const std::string binding =
- fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
- code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
- GetGlobalMemoryBlock(base) + " {");
- code.AddLine(" float " + GetGlobalMemory(base) + "[];");
- code.AddLine("};");
+ code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
+ base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
+ code.AddLine(" float {}[];", GetGlobalMemory(base));
+ code.AddLine("}};");
code.AddNewLine();
}
}
@@ -402,7 +434,7 @@ private:
void DeclareSamplers() {
const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) {
- std::string sampler_type = [&]() {
+ std::string sampler_type = [&sampler] {
switch (sampler.GetType()) {
case Tegra::Shader::TextureType::Texture1D:
return "sampler1D";
@@ -417,16 +449,52 @@ private:
return "sampler2D";
}
}();
- if (sampler.IsArray())
+ if (sampler.IsArray()) {
sampler_type += "Array";
- if (sampler.IsShadow())
+ }
+ if (sampler.IsShadow()) {
sampler_type += "Shadow";
+ }
- code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
- ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
+ code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(),
+ sampler_type, GetSampler(sampler));
}
- if (!samplers.empty())
+ if (!samplers.empty()) {
code.AddNewLine();
+ }
+ }
+
+ void DeclarePhysicalAttributeReader() {
+ if (!ir.HasPhysicalAttributes()) {
+ return;
+ }
+ code.AddLine("float readPhysicalAttribute(uint physical_address) {{");
+ ++code.scope;
+ code.AddLine("switch (physical_address) {{");
+
+ // Just declare generic attributes for now.
+ const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
+ for (u32 index = 0; index < num_attributes; ++index) {
+ const auto attribute{ToGenericAttribute(index)};
+ for (u32 element = 0; element < 4; ++element) {
+ constexpr u32 generic_base{0x80};
+ constexpr u32 generic_stride{16};
+ constexpr u32 element_stride{4};
+ const u32 address{generic_base + index * generic_stride + element * element_stride};
+
+ const bool declared{stage != ShaderStage::Fragment ||
+ header.ps.GetAttributeUse(index) != AttributeUse::Unused};
+ const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
+ code.AddLine("case 0x{:x}: return {};", address, value);
+ }
+ }
+
+ code.AddLine("default: return 0;");
+
+ code.AddLine("}}");
+ --code.scope;
+ code.AddLine("}}");
+ code.AddNewLine();
}
void VisitBlock(const NodeBlock& bb) {
@@ -450,23 +518,26 @@ private:
return {};
}
return (this->*decompiler)(*operation);
+ }
- } else if (const auto gpr = std::get_if<GprNode>(node)) {
+ if (const auto gpr = std::get_if<GprNode>(node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
return "0";
}
return GetRegister(index);
+ }
- } else if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
return fmt::format("utof({}u)", immediate->GetValue());
}
return fmt::format("utof(0x{:x}u)", immediate->GetValue());
+ }
- } else if (const auto predicate = std::get_if<PredicateNode>(node)) {
+ if (const auto predicate = std::get_if<PredicateNode>(node)) {
const auto value = [&]() -> std::string {
switch (const auto index = predicate->GetIndex(); index) {
case Tegra::Shader::Pred::UnusedIndex:
@@ -478,77 +549,22 @@ private:
}
}();
if (predicate->IsNegated()) {
- return "!(" + value + ')';
+ return fmt::format("!({})", value);
}
return value;
+ }
- } else if (const auto abuf = std::get_if<AbufNode>(node)) {
- const auto attribute = abuf->GetIndex();
- const auto element = abuf->GetElement();
-
- const auto GeometryPass = [&](const std::string& name) {
- if (stage == ShaderStage::Geometry && abuf->GetBuffer()) {
- // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
- // set an 0x80000000 index for those and the shader fails to build. Find out why
- // this happens and what's its intent.
- return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) +
- ") % MAX_VERTEX_INPUT]";
- }
- return name;
- };
-
- switch (attribute) {
- case Attribute::Index::Position:
- if (stage != ShaderStage::Fragment) {
- return GeometryPass("position") + GetSwizzle(element);
- } else {
- return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
- }
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return "gl_PointCoord.x";
- case 1:
- return "gl_PointCoord.y";
- case 2:
- case 3:
- return "0";
- }
- UNREACHABLE();
- return "0";
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- ASSERT(stage == ShaderStage::Vertex);
- switch (element) {
- case 2:
- // Config pack's first value is instance_id.
- return "uintBitsToFloat(config_pack[0])";
- case 3:
- return "uintBitsToFloat(gl_VertexID)";
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return "0";
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderStage::Fragment);
- switch (element) {
- case 3:
- return "itof(gl_FrontFacing ? -1 : 0)";
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return "0";
- default:
- if (attribute >= Attribute::Index::Attribute_0 &&
- attribute <= Attribute::Index::Attribute_31) {
- return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
- }
- break;
+ if (const auto abuf = std::get_if<AbufNode>(node)) {
+ UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
+ "Physical attributes in geometry shaders are not implemented");
+ if (abuf->IsPhysicalBuffer()) {
+ return fmt::format("readPhysicalAttribute(ftou({}))",
+ Visit(abuf->GetPhysicalAddress()));
}
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
+ }
- } else if (const auto cbuf = std::get_if<CbufNode>(node)) {
+ if (const auto cbuf = std::get_if<CbufNode>(node)) {
const Node offset = cbuf->GetOffset();
if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
// Direct access
@@ -556,48 +572,117 @@ private:
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
offset_imm / (4 * 4), (offset_imm / 4) % 4);
+ }
- } else if (std::holds_alternative<OperationNode>(*offset)) {
+ if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);");
+ code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
final_offset, final_offset);
-
- } else {
- UNREACHABLE_MSG("Unmanaged offset node type");
}
- } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+ UNREACHABLE_MSG("Unmanaged offset node type");
+ }
+
+ if (const auto gmem = std::get_if<GmemNode>(node)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ }
- } else if (const auto lmem = std::get_if<LmemNode>(node)) {
+ if (const auto lmem = std::get_if<LmemNode>(node)) {
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ }
- } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
return GetInternalFlag(internal_flag->GetFlag());
+ }
- } else if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+ if (const auto conditional = std::get_if<ConditionalNode>(node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {");
+ code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
++code.scope;
VisitBlock(conditional->GetCode());
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
return {};
+ }
- } else if (const auto comment = std::get_if<CommentNode>(node)) {
+ if (const auto comment = std::get_if<CommentNode>(node)) {
return "// " + comment->GetText();
}
+
UNREACHABLE();
return {};
}
+ std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) {
+ const auto GeometryPass = [&](std::string_view name) {
+ if (stage == ShaderStage::Geometry && buffer) {
+ // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
+ // set an 0x80000000 index for those and the shader fails to build. Find out why
+ // this happens and what's its intent.
+ return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer));
+ }
+ return std::string(name);
+ };
+
+ switch (attribute) {
+ case Attribute::Index::Position:
+ if (stage != ShaderStage::Fragment) {
+ return GeometryPass("position") + GetSwizzle(element);
+ } else {
+ return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element);
+ }
+ case Attribute::Index::PointCoord:
+ switch (element) {
+ case 0:
+ return "gl_PointCoord.x";
+ case 1:
+ return "gl_PointCoord.y";
+ case 2:
+ case 3:
+ return "0";
+ }
+ UNREACHABLE();
+ return "0";
+ case Attribute::Index::TessCoordInstanceIDVertexID:
+ // TODO(Subv): Find out what the values are for the first two elements when inside a
+ // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+ // shader.
+ ASSERT(stage == ShaderStage::Vertex);
+ switch (element) {
+ case 2:
+ // Config pack's first value is instance_id.
+ return "uintBitsToFloat(config_pack[0])";
+ case 3:
+ return "uintBitsToFloat(gl_VertexID)";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
+ return "0";
+ case Attribute::Index::FrontFacing:
+ // TODO(Subv): Find out what the values are for the other elements.
+ ASSERT(stage == ShaderStage::Fragment);
+ switch (element) {
+ case 3:
+ return "itof(gl_FrontFacing ? -1 : 0)";
+ }
+ UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
+ return "0";
+ default:
+ if (IsGenericAttribute(attribute)) {
+ return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+ }
+ break;
+ }
+ UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
+ return "0";
+ }
+
std::string ApplyPrecise(Operation operation, const std::string& value) {
if (!IsPrecise(operation)) {
return value;
@@ -606,7 +691,7 @@ private:
const std::string precise = stage != ShaderStage::Fragment ? "precise " : "";
const std::string temporary = code.GenerateTemporary();
- code.AddLine(precise + "float " + temporary + " = " + value + ';');
+ code.AddLine("{}float {} = {};", precise, temporary, value);
return temporary;
}
@@ -620,7 +705,7 @@ private:
}
const std::string temporary = code.GenerateTemporary();
- code.AddLine("float " + temporary + " = " + Visit(operand) + ';');
+ code.AddLine("float {} = {};", temporary, Visit(operand));
return temporary;
}
@@ -635,31 +720,32 @@ private:
case Type::Float:
return value;
case Type::Int:
- return "ftoi(" + value + ')';
+ return fmt::format("ftoi({})", value);
case Type::Uint:
- return "ftou(" + value + ')';
+ return fmt::format("ftou({})", value);
case Type::HalfFloat:
- return "toHalf2(" + value + ')';
+ return fmt::format("toHalf2({})", value);
}
UNREACHABLE();
return value;
}
- std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) {
+ std::string BitwiseCastResult(const std::string& value, Type type,
+ bool needs_parenthesis = false) {
switch (type) {
case Type::Bool:
case Type::Bool2:
case Type::Float:
if (needs_parenthesis) {
- return '(' + value + ')';
+ return fmt::format("({})", value);
}
return value;
case Type::Int:
- return "itof(" + value + ')';
+ return fmt::format("itof({})", value);
case Type::Uint:
- return "utof(" + value + ')';
+ return fmt::format("utof({})", value);
case Type::HalfFloat:
- return "fromHalf2(" + value + ')';
+ return fmt::format("fromHalf2({})", value);
}
UNREACHABLE();
return value;
@@ -667,27 +753,27 @@ private:
std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
Type type_a, bool needs_parenthesis = true) {
- return ApplyPrecise(operation,
- BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')',
- result_type, needs_parenthesis));
+ const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a));
+
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis));
}
std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
Type type_a, Type type_b) {
const std::string op_a = VisitOperand(operation, 0, type_a);
const std::string op_b = VisitOperand(operation, 1, type_b);
+ const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
- return ApplyPrecise(
- operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
Type type_a, Type type_b) {
const std::string op_a = VisitOperand(operation, 0, type_a);
const std::string op_b = VisitOperand(operation, 1, type_b);
+ const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
- return ApplyPrecise(operation,
- BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
@@ -695,10 +781,9 @@ private:
const std::string op_a = VisitOperand(operation, 0, type_a);
const std::string op_b = VisitOperand(operation, 1, type_b);
const std::string op_c = VisitOperand(operation, 2, type_c);
+ const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
- return ApplyPrecise(
- operation,
- BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
@@ -707,10 +792,9 @@ private:
const std::string op_b = VisitOperand(operation, 1, type_b);
const std::string op_c = VisitOperand(operation, 2, type_c);
const std::string op_d = VisitOperand(operation, 3, type_d);
+ const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
- return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " +
- op_c + ", " + op_d + ')',
- result_type));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
@@ -773,7 +857,7 @@ private:
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
- expr += "ftoi(" + Visit(operand) + ')';
+ expr += fmt::format("ftoi({})", Visit(operand));
}
break;
case Type::Float:
@@ -806,7 +890,7 @@ private:
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
- expr += "ftoi(" + Visit(operand) + ')';
+ expr += fmt::format("ftoi({})", Visit(operand));
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
@@ -831,8 +915,9 @@ private:
return {};
}
target = GetRegister(gpr->GetIndex());
-
} else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+ UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
+
target = [&]() -> std::string {
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
case Attribute::Index::Position:
@@ -840,12 +925,11 @@ private:
case Attribute::Index::PointSize:
return "gl_PointSize";
case Attribute::Index::ClipDistances0123:
- return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']';
+ return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
case Attribute::Index::ClipDistances4567:
- return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']';
+ return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
default:
- if (attribute >= Attribute::Index::Attribute_0 &&
- attribute <= Attribute::Index::Attribute_31) {
+ if (IsGenericAttribute(attribute)) {
return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
@@ -853,21 +937,18 @@ private:
return "0";
}
}();
-
} else if (const auto lmem = std::get_if<LmemNode>(dest)) {
- target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
-
+ target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
} else if (const auto gmem = std::get_if<GmemNode>(dest)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
-
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
- code.AddLine(target + " = " + Visit(src) + ';');
+ code.AddLine("{} = {};", target, Visit(src));
return {};
}
@@ -920,8 +1001,9 @@ private:
const std::string condition = Visit(operation[0]);
const std::string true_case = Visit(operation[1]);
const std::string false_case = Visit(operation[2]);
- return ApplyPrecise(operation,
- '(' + condition + " ? " + true_case + " : " + false_case + ')');
+ const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
+
+ return ApplyPrecise(operation, op_str);
}
std::string FCos(Operation operation) {
@@ -985,9 +1067,9 @@ private:
std::string ILogicalShiftRight(Operation operation) {
const std::string op_a = VisitOperand(operation, 0, Type::Uint);
const std::string op_b = VisitOperand(operation, 1, Type::Uint);
+ const std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
- return ApplyPrecise(operation,
- BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int));
+ return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int));
}
std::string IArithmeticShiftRight(Operation operation) {
@@ -1043,11 +1125,12 @@ private:
}
std::string HNegate(Operation operation) {
- const auto GetNegate = [&](std::size_t index) -> std::string {
+ const auto GetNegate = [&](std::size_t index) {
return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
};
- const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" +
- GetNegate(1) + ", " + GetNegate(2) + "))";
+ const std::string value =
+ fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat),
+ GetNegate(1), GetNegate(2));
return BitwiseCastResult(value, Type::HalfFloat);
}
@@ -1055,7 +1138,8 @@ private:
const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
const std::string min = VisitOperand(operation, 1, Type::Float);
const std::string max = VisitOperand(operation, 2, Type::Float);
- const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))";
+ const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
+
return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
}
@@ -1066,34 +1150,35 @@ private:
case Tegra::Shader::HalfType::H0_H1:
return operand;
case Tegra::Shader::HalfType::F32:
- return "vec2(fromHalf2(" + operand + "))";
+ return fmt::format("vec2(fromHalf2({}))", operand);
case Tegra::Shader::HalfType::H0_H0:
- return "vec2(" + operand + "[0])";
+ return fmt::format("vec2({}[0])", operand);
case Tegra::Shader::HalfType::H1_H1:
- return "vec2(" + operand + "[1])";
+ return fmt::format("vec2({}[1])", operand);
}
UNREACHABLE();
return "0";
}();
- return "fromHalf2(" + value + ')';
+ return fmt::format("fromHalf2({})", value);
}
std::string HMergeF32(Operation operation) {
- return "float(toHalf2(" + Visit(operation[0]) + ")[0])";
+ return fmt::format("float(toHalf2({})[0])", Visit(operation[0]));
}
std::string HMergeH0(Operation operation) {
- return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" +
- Visit(operation[0]) + ")[1]))";
+ return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]),
+ Visit(operation[0]));
}
std::string HMergeH1(Operation operation) {
- return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" +
- Visit(operation[1]) + ")[1]))";
+ return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]),
+ Visit(operation[1]));
}
std::string HPack2(Operation operation) {
- return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))";
+ return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]),
+ Visit(operation[1]));
}
template <Type type>
@@ -1151,7 +1236,7 @@ private:
target = GetInternalFlag(flag->GetFlag());
}
- code.AddLine(target + " = " + Visit(src) + ';');
+ code.AddLine("{} = {};", target, Visit(src));
return {};
}
@@ -1173,7 +1258,7 @@ private:
std::string LogicalPick2(Operation operation) {
const std::string pair = VisitOperand(operation, 0, Type::Bool2);
- return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']';
+ return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
}
std::string LogicalAll2(Operation operation) {
@@ -1185,15 +1270,15 @@ private:
}
template <bool with_nan>
- std::string GenerateHalfComparison(Operation operation, std::string compare_op) {
- std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat)};
+ std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
+ const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat)};
if constexpr (!with_nan) {
return comparison;
}
- return "halfFloatNanComparison(" + comparison + ", " +
- VisitOperand(operation, 0, Type::HalfFloat) + ", " +
- VisitOperand(operation, 1, Type::HalfFloat) + ')';
+ return fmt::format("halfFloatNanComparison({}, {}, {})", comparison,
+ VisitOperand(operation, 0, Type::HalfFloat),
+ VisitOperand(operation, 1, Type::HalfFloat));
}
template <bool with_nan>
@@ -1270,12 +1355,12 @@ private:
switch (meta->element) {
case 0:
case 1:
- return "itof(int(textureSize(" + sampler + ", " + lod + ')' +
- GetSwizzle(meta->element) + "))";
+ return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod,
+ GetSwizzle(meta->element));
case 2:
return "0";
case 3:
- return "itof(textureQueryLevels(" + sampler + "))";
+ return fmt::format("itof(textureQueryLevels({}))", sampler);
}
UNREACHABLE();
return "0";
@@ -1286,8 +1371,9 @@ private:
ASSERT(meta);
if (meta->element < 2) {
- return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
- GetSwizzle(meta->element) + "))";
+ return fmt::format("itof(int(({} * vec2(256)){}))",
+ GenerateTexture(operation, "QueryLod", {}),
+ GetSwizzle(meta->element));
}
return "0";
}
@@ -1326,7 +1412,7 @@ private:
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue()));
+ code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
code.AddLine("break;");
return {};
}
@@ -1335,7 +1421,7 @@ private:
const auto target = std::get_if<ImmediateNode>(operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()));
+ code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue());
return {};
}
@@ -1361,7 +1447,7 @@ private:
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
- code.AddLine("if (alpha_test[0] != 0) {");
+ code.AddLine("if (alpha_test[0] != 0) {{");
++code.scope;
// We start on the register containing the alpha value in the first RT.
u32 current_reg = 3;
@@ -1372,13 +1458,12 @@ private:
header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
- code.AddLine(
- fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)));
+ code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg));
current_reg += 4;
}
}
--code.scope;
- code.AddLine('}');
+ code.AddLine("}}");
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
@@ -1387,8 +1472,8 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
- SafeGetRegister(current_reg)));
+ code.AddLine("FragColor{}[{}] = {};", render_target, component,
+ SafeGetRegister(current_reg));
++current_reg;
}
}
@@ -1397,7 +1482,7 @@ private:
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';');
+ code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1));
}
code.AddLine("return;");
@@ -1407,11 +1492,11 @@ private:
std::string Discard(Operation operation) {
// Enclose "discard" in a conditional, so that GLSL compilation does not complain
// about unexecuted instructions that may follow this.
- code.AddLine("if (true) {");
+ code.AddLine("if (true) {{");
++code.scope;
code.AddLine("discard;");
--code.scope;
- code.AddLine("}");
+ code.AddLine("}}");
return {};
}
@@ -1591,15 +1676,11 @@ private:
}
std::string GetInputAttribute(Attribute::Index attribute) const {
- const auto index{static_cast<u32>(attribute) -
- static_cast<u32>(Attribute::Index::Attribute_0)};
- return GetDeclarationWithSuffix(index, "input_attr");
+ return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
}
std::string GetOutputAttribute(Attribute::Index attribute) const {
- const auto index{static_cast<u32>(attribute) -
- static_cast<u32>(Attribute::Index::Attribute_0)};
- return GetDeclarationWithSuffix(index, "output_attr");
+ return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
}
std::string GetConstBuffer(u32 index) const {
@@ -1629,7 +1710,7 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
- return std::string(InternalFlagNames[index]) + '_' + suffix;
+ return fmt::format("{}_{}", InternalFlagNames[index], suffix);
}
std::string GetSampler(const Sampler& sampler) const {
@@ -1637,7 +1718,20 @@ private:
}
std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
- return name + '_' + std::to_string(index) + '_' + suffix;
+ return fmt::format("{}_{}_{}", name, index, suffix);
+ }
+
+ u32 GetNumPhysicalInputAttributes() const {
+ return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
+ }
+
+ u32 GetNumPhysicalAttributes() const {
+ return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
+ }
+
+ u32 GetNumPhysicalVaryings() const {
+ return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
+ Maxwell::NumVaryings);
}
const Device& device;
@@ -1652,24 +1746,25 @@ private:
} // Anonymous namespace
std::string GetCommonDeclarations() {
- const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
- return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
- "#define ftoi floatBitsToInt\n"
- "#define ftou floatBitsToUint\n"
- "#define itof intBitsToFloat\n"
- "#define utof uintBitsToFloat\n\n"
- "float fromHalf2(vec2 pair) {\n"
- " return utof(packHalf2x16(pair));\n"
- "}\n\n"
- "vec2 toHalf2(float value) {\n"
- " return unpackHalf2x16(ftou(value));\n"
- "}\n\n"
- "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n"
- " bvec2 is_nan1 = isnan(pair1);\n"
- " bvec2 is_nan2 = isnan(pair2);\n"
- " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
- "is_nan2.y);\n"
- "}\n";
+ return fmt::format(
+ "#define MAX_CONSTBUFFER_ELEMENTS {}\n"
+ "#define ftoi floatBitsToInt\n"
+ "#define ftou floatBitsToUint\n"
+ "#define itof intBitsToFloat\n"
+ "#define utof uintBitsToFloat\n\n"
+ "float fromHalf2(vec2 pair) {{\n"
+ " return utof(packHalf2x16(pair));\n"
+ "}}\n\n"
+ "vec2 toHalf2(float value) {{\n"
+ " return unpackHalf2x16(ftou(value));\n"
+ "}}\n\n"
+ "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
+ " bvec2 is_nan1 = isnan(pair1);\n"
+ " bvec2 is_nan2 = isnan(pair2);\n"
+ " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
+ "is_nan2.y);\n"
+ "}}\n",
+ MAX_CONSTBUFFER_ELEMENTS);
}
ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 254c0d499..fba9c594a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
return true;
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system)
- : system{system}, precompiled_cache_virtual_file_offset{0} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+
+ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
ShaderDiskCacheOpenGL::LoadTransferable() {
@@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- const auto entry = LoadDecompiledEntry();
+ auto entry = LoadDecompiledEntry();
if (!entry) {
return {};
}
@@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- std::vector<u8> code(code_size);
+ std::string code(code_size, '\0');
if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
return {};
}
ShaderDiskCacheDecompiled entry;
- entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
+ entry.code = std::move(code);
u32 const_buffers_count{};
if (!LoadObjectFromPrecompiled(const_buffers_count)) {
@@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < const_buffers_count; ++i) {
u32 max_offset{};
u32 index{};
- u8 is_indirect{};
+ bool is_indirect{};
if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(is_indirect)) {
return {};
}
- entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
+ entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
}
u32 samplers_count{};
@@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u64 offset{};
u64 index{};
u32 type{};
- u8 is_array{};
- u8 is_shadow{};
- u8 is_bindless{};
+ bool is_array{};
+ bool is_shadow{};
+ bool is_bindless{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
!LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
!LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
return {};
}
- entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
- static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::TextureType>(type),
- is_array != 0, is_shadow != 0, is_bindless != 0);
+ entry.entries.samplers.emplace_back(
+ static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
+ static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
}
u32 global_memory_count{};
@@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
for (u32 i = 0; i < global_memory_count; ++i) {
u32 cbuf_index{};
u32 cbuf_offset{};
- u8 is_read{};
- u8 is_written{};
+ bool is_read{};
+ bool is_written{};
if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
!LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
return {};
}
- entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
- is_written != 0);
+ entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
+ is_written);
}
for (auto& clip_distance : entry.entries.clip_distances) {
- u8 clip_distance_raw{};
- if (!LoadObjectFromPrecompiled(clip_distance_raw))
+ if (!LoadObjectFromPrecompiled(clip_distance)) {
return {};
- clip_distance = clip_distance_raw != 0;
+ }
}
u64 shader_length{};
@@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& cbuf : entries.const_buffers) {
if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
return false;
}
}
@@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(sampler.IsArray()) ||
+ !SaveObjectToPrecompiled(sampler.IsShadow()) ||
+ !SaveObjectToPrecompiled(sampler.IsBindless())) {
return false;
}
}
@@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
for (const auto& gmem : entries.global_memory_entries) {
if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) ||
- !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
return false;
}
}
for (const bool clip_distance : entries.clip_distances) {
- if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) {
+ if (!SaveObjectToPrecompiled(clip_distance)) {
return false;
}
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 0142b2e3b..2da0a4a23 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -70,14 +70,14 @@ namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
- std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
+ std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
}
};
template <>
struct hash<OpenGL::ShaderDiskCacheUsage> {
- std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
+ std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
return static_cast<std::size_t>(usage.unique_identifier) ^
std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
}
@@ -162,6 +162,7 @@ struct ShaderDiskCacheDump {
class ShaderDiskCacheOpenGL {
public:
explicit ShaderDiskCacheOpenGL(Core::System& system);
+ ~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
@@ -259,20 +260,35 @@ private:
return SaveArrayToPrecompiled(&object, 1);
}
+ bool SaveObjectToPrecompiled(bool object) {
+ const auto value = static_cast<u8>(object);
+ return SaveArrayToPrecompiled(&value, 1);
+ }
+
template <typename T>
bool LoadObjectFromPrecompiled(T& object) {
return LoadArrayFromPrecompiled(&object, 1);
}
- // Copre system
+ bool LoadObjectFromPrecompiled(bool& object) {
+ u8 value;
+ const bool read_ok = LoadArrayFromPrecompiled(&value, 1);
+ if (!read_ok) {
+ return false;
+ }
+
+ object = value != 0;
+ return true;
+ }
+
+ // Core system
Core::System& system;
// Stored transferable shaders
std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
+ // Stores whole precompiled cache which will be read from/saved to the precompiled cache file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
// Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset;
+ std::size_t precompiled_cache_virtual_file_offset = 0;
// The cache has been loaded at boot
bool tried_to_load{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 6abf948f8..7ab0b4553 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -33,14 +33,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
};
)";
- ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
- ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
+ const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
ProgramResult program_b =
Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
@@ -76,7 +76,7 @@ void main() {
}
})";
- return {out, program.second};
+ return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
@@ -97,7 +97,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
};
)";
- ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
out += program.first;
@@ -107,7 +107,7 @@ void main() {
execute_geometry();
};)";
- return {out, program.second};
+ return {std::move(out), std::move(program.second)};
}
ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
@@ -160,7 +160,7 @@ bool AlphaFunc(in float value) {
}
)";
- ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
@@ -172,7 +172,7 @@ void main() {
}
)";
- return {out, program.second};
+ return {std::move(out), std::move(program.second)};
}
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 95b773135..ed7b5cff0 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -126,6 +126,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLES;
case Maxwell::PrimitiveTopology::TriangleStrip:
return GL_TRIANGLE_STRIP;
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ return GL_TRIANGLE_FAN;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();