From c67d64365a712830fe140dd36e24e2efd9b8a812 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 16 Feb 2021 20:52:12 -0300 Subject: shader: Remove old shader management --- .../renderer_opengl/gl_arb_decompiler.cpp | 2124 -------------- src/video_core/renderer_opengl/gl_arb_decompiler.h | 29 - src/video_core/renderer_opengl/gl_rasterizer.cpp | 314 +- src/video_core/renderer_opengl/gl_rasterizer.h | 33 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 564 +--- src/video_core/renderer_opengl/gl_shader_cache.h | 102 +- .../renderer_opengl/gl_shader_decompiler.cpp | 2986 -------------------- .../renderer_opengl/gl_shader_decompiler.h | 69 - .../renderer_opengl/gl_shader_disk_cache.cpp | 482 ---- .../renderer_opengl/gl_shader_disk_cache.h | 176 -- 10 files changed, 8 insertions(+), 6871 deletions(-) delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_arb_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_decompiler.h delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_shader_disk_cache.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp deleted file mode 100644 index e8d8d2aa5..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ /dev/null @@ -1,2124 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -// Predicates in the decompiled code follow the convention that -1 means true and 0 means false. -// GLASM lacks booleans, so they have to be implemented as integers. -// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to -// select between two values, because -1 will be evaluated as true and 0 as false. - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using namespace VideoCommon::Shader; -using Operation = const OperationNode&; - -constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; - -char Swizzle(std::size_t component) { - static constexpr std::string_view SWIZZLE{"xyzw"}; - return SWIZZLE.at(component); -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -std::string_view Modifiers(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - if (meta && meta->precise) { - return ".PREC"; - } - return ""; -} - -std::string_view GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return ""; - case PixelImap::Constant: - return "FLAT "; - case PixelImap::ScreenLinear: - return "NOPERSPECTIVE "; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; -} - -std::string_view ImageType(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "BUFFER"; - case Tegra::Shader::ImageType::Texture1DArray: - return "ARRAY1D"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "ARRAY2D"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - } - UNREACHABLE(); - return {}; -} - -std::string_view StackName(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "SSY"; - case MetaStackClass::Pbk: - return "PBK"; - } - UNREACHABLE(); - return ""; -}; - -std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: - return "POINTS"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: - return "LINES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - return "LINES_ADJACENCY"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - return "TRIANGLES"; - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - return "TRIANGLES_ADJACENCY"; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return "POINTS"; - } -} - -std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "POINTS"; - case Tegra::Shader::OutputTopology::LineStrip: - return "LINE_STRIP"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "TRIANGLE_STRIP"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -std::string_view StageInputName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - case ShaderType::Geometry: - return "vertex"; - case ShaderType::Fragment: - return "fragment"; - case ShaderType::Compute: - return "invocation"; - default: - UNREACHABLE(); - return ""; - } -} - -std::string TextureType(const MetaTexture& meta) { - if (meta.sampler.is_buffer) { - return "BUFFER"; - } - std::string type; - if (meta.sampler.is_shadow) { - type += "SHADOW"; - } - if (meta.sampler.is_array) { - type += "ARRAY"; - } - type += [&meta] { - switch (meta.sampler.type) { - case Tegra::Shader::TextureType::Texture1D: - return "1D"; - case Tegra::Shader::TextureType::Texture2D: - return "2D"; - case Tegra::Shader::TextureType::Texture3D: - return "3D"; - case Tegra::Shader::TextureType::TextureCube: - return "CUBE"; - } - UNREACHABLE(); - return "2D"; - }(); - return type; -} - -class ARBDecompiler final { -public: - explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier); - - std::string Code() const { - return shader_source; - } - -private: - void DefineGlobalMemory(); - - void DeclareHeader(); - void DeclareVertex(); - void DeclareGeometry(); - void DeclareFragment(); - void DeclareCompute(); - void DeclareInputAttributes(); - void DeclareOutputAttributes(); - void DeclareLocalMemory(); - void DeclareGlobalMemory(); - void DeclareConstantBuffers(); - void DeclareRegisters(); - void DeclareTemporaries(); - void DeclarePredicates(); - void DeclareInternalFlags(); - - void InitializeVariables(); - - void DecompileAST(); - void DecompileBranchMode(); - - void VisitAST(const ASTNode& node); - std::string VisitExpression(const Expr& node); - - void VisitBlock(const NodeBlock& bb); - - std::string Visit(const Node& node); - - std::tuple BuildCoords(Operation); - std::string BuildAoffi(Operation); - std::string GlobalMemoryPointer(const GmemNode& gmem); - void Exit(); - - std::string Assign(Operation); - std::string Select(Operation); - std::string FClamp(Operation); - std::string FCastHalf0(Operation); - std::string FCastHalf1(Operation); - std::string FSqrt(Operation); - std::string FSwizzleAdd(Operation); - std::string HAdd2(Operation); - std::string HMul2(Operation); - std::string HFma2(Operation); - std::string HAbsolute(Operation); - std::string HNegate(Operation); - std::string HClamp(Operation); - std::string HCastFloat(Operation); - std::string HUnpack(Operation); - std::string HMergeF32(Operation); - std::string HMergeH0(Operation); - std::string HMergeH1(Operation); - std::string HPack2(Operation); - std::string LogicalAssign(Operation); - std::string LogicalPick2(Operation); - std::string LogicalAnd2(Operation); - std::string FloatOrdered(Operation); - std::string FloatUnordered(Operation); - std::string LogicalAddCarry(Operation); - std::string Texture(Operation); - std::string TextureGather(Operation); - std::string TextureQueryDimensions(Operation); - std::string TextureQueryLod(Operation); - std::string TexelFetch(Operation); - std::string TextureGradient(Operation); - std::string ImageLoad(Operation); - std::string ImageStore(Operation); - std::string Branch(Operation); - std::string BranchIndirect(Operation); - std::string PushFlowStack(Operation); - std::string PopFlowStack(Operation); - std::string Exit(Operation); - std::string Discard(Operation); - std::string EmitVertex(Operation); - std::string EndPrimitive(Operation); - std::string InvocationId(Operation); - std::string YNegate(Operation); - std::string ThreadId(Operation); - std::string ShuffleIndexed(Operation); - std::string Barrier(Operation); - std::string MemoryBarrierGroup(Operation); - std::string MemoryBarrierGlobal(Operation); - - template - std::string Unary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); - return temporary; - } - - template - std::string Binary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1])); - return temporary; - } - - template - std::string Trinary(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), - Visit(operation[1]), Visit(operation[2])); - return temporary; - } - - template - std::string FloatComparison(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("TRUNC.U.CC RC.x, {};", Binary(operation)); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NE.x), -1;", temporary); - - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - if constexpr (unordered) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), -1;", temporary); - } else if (op == SNE_F) { - AddLine("SNE.F RC.x, {}, {};", op_a, op_a); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - AddLine("SNE.F RC.x, {}, {};", op_b, op_b); - AddLine("TRUNC.U.CC RC.x, RC.x;"); - AddLine("MOV.S {} (NE.x), 0;", temporary); - } - return temporary; - } - - template - std::string HalfComparison(Operation operation) { - std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string op_a = Visit(operation[0]); - const std::string op_b = Visit(operation[1]); - AddLine("UP2H.F {}, {};", tmp1, op_a); - AddLine("UP2H.F {}, {};", tmp2, op_b); - AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); - AddLine("TRUNC.U.CC RC.xy, {};", tmp1); - AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); - AddLine("MOV.S {}.x (NE.x), -1;", tmp1); - AddLine("MOV.S {}.y (NE.y), -1;", tmp1); - if constexpr (is_nan) { - AddLine("MOVC.F RC.x, {};", op_a); - AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); - AddLine("MOVC.F RC.x, {};", op_b); - AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); - } - return tmp1; - } - - template - std::string AtomicImage(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - - AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord, - image_id, ImageType(meta.image.type)); - return fmt::format("{}.x", coord); - } - - template - std::string Atomic(Operation operation) { - std::string temporary = AllocTemporary(); - std::string address; - std::string_view opname; - bool robust = false; - if (const auto gmem = std::get_if(&*operation[0])) { - address = GlobalMemoryPointer(*gmem); - opname = "ATOM"; - robust = true; - } else if (const auto smem = std::get_if(&*operation[0])) { - address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); - opname = "ATOMS"; - } else { - UNREACHABLE(); - return "{0, 0, 0, 0}"; - } - if (robust) { - AddLine("IF NE.x;"); - } - AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); - if (robust) { - AddLine("ELSE;"); - AddLine("MOV.S {}, 0;", temporary); - AddLine("ENDIF;"); - } - return temporary; - } - - template - std::string Negate(Operation operation) { - std::string temporary = AllocTemporary(); - if constexpr (type == 'F') { - AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); - } else { - AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); - } - return temporary; - } - - template - std::string Absolute(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); - return temporary; - } - - template - std::string BitfieldInsert(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); - AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), - Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string BitfieldExtract(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); - AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); - AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); - } - - template - std::string LocalInvocationId(Operation) { - return fmt::format("invocation.localid.{}", swizzle); - } - - template - std::string WorkGroupId(Operation) { - return fmt::format("invocation.groupid.{}", swizzle); - } - - template - std::string ThreadMask(Operation) { - return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); - } - - template - void AddExpression(std::string_view text, Args&&... args) { - shader_source += fmt::format(fmt::runtime(text), std::forward(args)...); - } - - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(text, std::forward(args)...); - shader_source += '\n'; - } - - std::string AllocLongVectorTemporary() { - max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); - return fmt::format("L{}", num_long_temporaries++); - } - - std::string AllocLongTemporary() { - return fmt::format("{}.x", AllocLongVectorTemporary()); - } - - std::string AllocVectorTemporary() { - max_temporaries = std::max(max_temporaries, num_temporaries + 1); - return fmt::format("T{}", num_temporaries++); - } - - std::string AllocTemporary() { - return fmt::format("{}.x", AllocVectorTemporary()); - } - - void ResetTemporaries() noexcept { - num_temporaries = 0; - num_long_temporaries = 0; - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - - std::size_t num_temporaries = 0; - std::size_t max_temporaries = 0; - - std::size_t num_long_temporaries = 0; - std::size_t max_long_temporaries = 0; - - std::map global_memory_names; - - std::string shader_source; - - static constexpr std::string_view ADD_F32 = "ADD.F32"; - static constexpr std::string_view ADD_S = "ADD.S"; - static constexpr std::string_view ADD_U = "ADD.U"; - static constexpr std::string_view MUL_F32 = "MUL.F32"; - static constexpr std::string_view MUL_S = "MUL.S"; - static constexpr std::string_view MUL_U = "MUL.U"; - static constexpr std::string_view DIV_F32 = "DIV.F32"; - static constexpr std::string_view DIV_S = "DIV.S"; - static constexpr std::string_view DIV_U = "DIV.U"; - static constexpr std::string_view MAD_F32 = "MAD.F32"; - static constexpr std::string_view RSQ_F32 = "RSQ.F32"; - static constexpr std::string_view COS_F32 = "COS.F32"; - static constexpr std::string_view SIN_F32 = "SIN.F32"; - static constexpr std::string_view EX2_F32 = "EX2.F32"; - static constexpr std::string_view LG2_F32 = "LG2.F32"; - static constexpr std::string_view SLT_F = "SLT.F32"; - static constexpr std::string_view SLT_S = "SLT.S"; - static constexpr std::string_view SLT_U = "SLT.U"; - static constexpr std::string_view SEQ_F = "SEQ.F32"; - static constexpr std::string_view SEQ_S = "SEQ.S"; - static constexpr std::string_view SEQ_U = "SEQ.U"; - static constexpr std::string_view SLE_F = "SLE.F32"; - static constexpr std::string_view SLE_S = "SLE.S"; - static constexpr std::string_view SLE_U = "SLE.U"; - static constexpr std::string_view SGT_F = "SGT.F32"; - static constexpr std::string_view SGT_S = "SGT.S"; - static constexpr std::string_view SGT_U = "SGT.U"; - static constexpr std::string_view SNE_F = "SNE.F32"; - static constexpr std::string_view SNE_S = "SNE.S"; - static constexpr std::string_view SNE_U = "SNE.U"; - static constexpr std::string_view SGE_F = "SGE.F32"; - static constexpr std::string_view SGE_S = "SGE.S"; - static constexpr std::string_view SGE_U = "SGE.U"; - static constexpr std::string_view AND_S = "AND.S"; - static constexpr std::string_view AND_U = "AND.U"; - static constexpr std::string_view TRUNC_F = "TRUNC.F"; - static constexpr std::string_view TRUNC_S = "TRUNC.S"; - static constexpr std::string_view TRUNC_U = "TRUNC.U"; - static constexpr std::string_view SHL_S = "SHL.S"; - static constexpr std::string_view SHL_U = "SHL.U"; - static constexpr std::string_view SHR_S = "SHR.S"; - static constexpr std::string_view SHR_U = "SHR.U"; - static constexpr std::string_view OR_S = "OR.S"; - static constexpr std::string_view OR_U = "OR.U"; - static constexpr std::string_view XOR_S = "XOR.S"; - static constexpr std::string_view XOR_U = "XOR.U"; - static constexpr std::string_view NOT_S = "NOT.S"; - static constexpr std::string_view NOT_U = "NOT.U"; - static constexpr std::string_view BTC_S = "BTC.S"; - static constexpr std::string_view BTC_U = "BTC.U"; - static constexpr std::string_view BTFM_S = "BTFM.S"; - static constexpr std::string_view BTFM_U = "BTFM.U"; - static constexpr std::string_view ROUND_F = "ROUND.F"; - static constexpr std::string_view CEIL_F = "CEIL.F"; - static constexpr std::string_view FLR_F = "FLR.F"; - static constexpr std::string_view I2F_S = "I2F.S"; - static constexpr std::string_view I2F_U = "I2F.U"; - static constexpr std::string_view MIN_F = "MIN.F"; - static constexpr std::string_view MIN_S = "MIN.S"; - static constexpr std::string_view MIN_U = "MIN.U"; - static constexpr std::string_view MAX_F = "MAX.F"; - static constexpr std::string_view MAX_S = "MAX.S"; - static constexpr std::string_view MAX_U = "MAX.U"; - static constexpr std::string_view MOV_U = "MOV.U"; - static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; - static constexpr std::string_view TGALL_U = "TGALL.U"; - static constexpr std::string_view TGANY_U = "TGANY.U"; - static constexpr std::string_view TGEQ_U = "TGEQ.U"; - static constexpr std::string_view EXCH = "EXCH"; - static constexpr std::string_view ADD = "ADD"; - static constexpr std::string_view MIN = "MIN"; - static constexpr std::string_view MAX = "MAX"; - static constexpr std::string_view AND = "AND"; - static constexpr std::string_view OR = "OR"; - static constexpr std::string_view XOR = "XOR"; - static constexpr std::string_view U32 = "U32"; - static constexpr std::string_view S32 = "S32"; - - static constexpr std::size_t NUM_ENTRIES = static_cast(OperationCode::Amount); - using DecompilerType = std::string (ARBDecompiler::*)(Operation); - static constexpr std::array OPERATION_DECOMPILERS = { - &ARBDecompiler::Assign, - - &ARBDecompiler::Select, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Trinary, - &ARBDecompiler::Negate<'F'>, - &ARBDecompiler::Absolute<'F'>, - &ARBDecompiler::FClamp, - &ARBDecompiler::FCastHalf0, - &ARBDecompiler::FCastHalf1, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSqrt, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::FSwizzleAdd, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Negate<'S'>, - &ARBDecompiler::Absolute<'S'>, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'S'>, - &ARBDecompiler::BitfieldExtract<'S'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::BitfieldInsert<'U'>, - &ARBDecompiler::BitfieldExtract<'U'>, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::HAdd2, - &ARBDecompiler::HMul2, - &ARBDecompiler::HFma2, - &ARBDecompiler::HAbsolute, - &ARBDecompiler::HNegate, - &ARBDecompiler::HClamp, - &ARBDecompiler::HCastFloat, - &ARBDecompiler::HUnpack, - &ARBDecompiler::HMergeF32, - &ARBDecompiler::HMergeH0, - &ARBDecompiler::HMergeH1, - &ARBDecompiler::HPack2, - - &ARBDecompiler::LogicalAssign, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Unary, - &ARBDecompiler::LogicalPick2, - &ARBDecompiler::LogicalAnd2, - - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatOrdered, - &ARBDecompiler::FloatUnordered, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - &ARBDecompiler::FloatComparison, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - &ARBDecompiler::Binary, - - &ARBDecompiler::LogicalAddCarry, - - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - &ARBDecompiler::HalfComparison, - - &ARBDecompiler::Texture, - &ARBDecompiler::Texture, - &ARBDecompiler::TextureGather, - &ARBDecompiler::TextureQueryDimensions, - &ARBDecompiler::TextureQueryLod, - &ARBDecompiler::TexelFetch, - &ARBDecompiler::TextureGradient, - - &ARBDecompiler::ImageLoad, - &ARBDecompiler::ImageStore, - - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - &ARBDecompiler::AtomicImage, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - &ARBDecompiler::Atomic, - - &ARBDecompiler::Branch, - &ARBDecompiler::BranchIndirect, - &ARBDecompiler::PushFlowStack, - &ARBDecompiler::PopFlowStack, - &ARBDecompiler::Exit, - &ARBDecompiler::Discard, - - &ARBDecompiler::EmitVertex, - &ARBDecompiler::EndPrimitive, - - &ARBDecompiler::InvocationId, - &ARBDecompiler::YNegate, - &ARBDecompiler::LocalInvocationId<'x'>, - &ARBDecompiler::LocalInvocationId<'y'>, - &ARBDecompiler::LocalInvocationId<'z'>, - &ARBDecompiler::WorkGroupId<'x'>, - &ARBDecompiler::WorkGroupId<'y'>, - &ARBDecompiler::WorkGroupId<'z'>, - - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - &ARBDecompiler::Unary, - - &ARBDecompiler::ThreadId, - &ARBDecompiler::ThreadMask<'e', 'q'>, - &ARBDecompiler::ThreadMask<'g', 'e'>, - &ARBDecompiler::ThreadMask<'g', 't'>, - &ARBDecompiler::ThreadMask<'l', 'e'>, - &ARBDecompiler::ThreadMask<'l', 't'>, - &ARBDecompiler::ShuffleIndexed, - - &ARBDecompiler::Barrier, - &ARBDecompiler::MemoryBarrierGroup, - &ARBDecompiler::MemoryBarrierGlobal, - }; -}; - -ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} { - DefineGlobalMemory(); - - AddLine("TEMP RC;"); - AddLine("TEMP FSWZA[4];"); - AddLine("TEMP FSWZB[4];"); - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - AddLine("END"); - - const std::string code = std::move(shader_source); - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareLocalMemory(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareRegisters(); - DeclareTemporaries(); - DeclarePredicates(); - DeclareInternalFlags(); - - shader_source += code; -} - -std::string_view HeaderStageName(ShaderType stage) { - switch (stage) { - case ShaderType::Vertex: - return "vp"; - case ShaderType::Geometry: - return "gp"; - case ShaderType::Fragment: - return "fp"; - case ShaderType::Compute: - return "cp"; - default: - UNREACHABLE(); - return ""; - } -} - -void ARBDecompiler::DefineGlobalMemory() { - u32 binding = 0; - for (const auto& pair : ir.GetGlobalMemory()) { - const GlobalMemoryBase base = pair.first; - global_memory_names.emplace(base, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareHeader() { - AddLine("!!NV{}5.0", HeaderStageName(stage)); - // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D - AddLine("OPTION NV_internal;"); - AddLine("OPTION NV_gpu_program_fp64;"); - AddLine("OPTION NV_shader_thread_group;"); - if (ir.UsesWarps() && device.HasWarpIntrinsics()) { - AddLine("OPTION NV_shader_thread_shuffle;"); - } - if (stage == ShaderType::Vertex) { - if (device.HasNvViewportArray2()) { - AddLine("OPTION NV_viewport_array2;"); - } - } - if (stage == ShaderType::Fragment) { - AddLine("OPTION ARB_draw_buffers;"); - } - if (device.HasImageLoadFormatted()) { - AddLine("OPTION EXT_shader_image_load_formatted;"); - } -} - -void ARBDecompiler::DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); -} - -void ARBDecompiler::DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - const auto& info = registry.GetGraphicsInfo(); - const auto& header = ir.GetHeader(); - AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); - AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); - AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); - AddLine("ATTRIB vertex_position = vertex.position;"); -} - -void ARBDecompiler::DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - AddLine("OUTPUT result_color7 = result.color[7];"); - AddLine("OUTPUT result_color6 = result.color[6];"); - AddLine("OUTPUT result_color5 = result.color[5];"); - AddLine("OUTPUT result_color4 = result.color[4];"); - AddLine("OUTPUT result_color3 = result.color[3];"); - AddLine("OUTPUT result_color2 = result.color[2];"); - AddLine("OUTPUT result_color1 = result.color[1];"); - AddLine("OUTPUT result_color0 = result.color;"); -} - -void ARBDecompiler::DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const ComputeInfo& info = registry.GetComputeInfo(); - AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], - info.workgroup_size[2]); - if (info.shared_memory_size_in_words == 0) { - return; - } - const u32 limit = device.GetMaxComputeSharedMemorySize(); - u32 size_in_bytes = info.shared_memory_size_in_words * 4; - if (size_in_bytes > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size_in_bytes, limit); - size_in_bytes = limit; - } - - AddLine("SHARED_MEMORY {};", size_in_bytes); - AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); -} - -void ARBDecompiler::DeclareInputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - const std::string_view stage_name = StageInputName(stage); - for (const auto attribute : ir.GetInputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - - std::string_view suffix; - if (stage == ShaderType::Fragment) { - const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix = GetInputFlags(input_mode); - } - AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, - index); - } -} - -void ARBDecompiler::DeclareOutputAttributes() { - if (stage == ShaderType::Compute) { - return; - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); - } -} - -void ARBDecompiler::DeclareLocalMemory() { - u64 size = 0; - if (stage == ShaderType::Compute) { - size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - size = ir.GetHeader().GetLocalMemorySize(); - } - if (size == 0) { - return; - } - const u64 element_count = Common::AlignUp(size, 4) / 4; - AddLine("TEMP lmem[{}];", element_count); -} - -void ARBDecompiler::DeclareGlobalMemory() { - const size_t num_entries = ir.GetGlobalMemory().size(); - if (num_entries > 0) { - AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); - } -} - -void ARBDecompiler::DeclareConstantBuffers() { - u32 binding = 0; - for (const auto& cbuf : ir.GetConstantBuffers()) { - AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); - ++binding; - } -} - -void ARBDecompiler::DeclareRegisters() { - for (const u32 gpr : ir.GetRegisters()) { - AddLine("TEMP R{};", gpr); - } -} - -void ARBDecompiler::DeclareTemporaries() { - for (std::size_t i = 0; i < max_temporaries; ++i) { - AddLine("TEMP T{};", i); - } - for (std::size_t i = 0; i < max_long_temporaries; ++i) { - AddLine("LONG TEMP L{};", i); - } -} - -void ARBDecompiler::DeclarePredicates() { - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("TEMP P{};", static_cast(pred)); - } -} - -void ARBDecompiler::DeclareInternalFlags() { - for (const char* name : INTERNAL_FLAG_NAMES) { - AddLine("TEMP {};", name); - } -} - -void ARBDecompiler::InitializeVariables() { - AddLine("MOV.F32 FSWZA[0], -1;"); - AddLine("MOV.F32 FSWZA[1], 1;"); - AddLine("MOV.F32 FSWZA[2], -1;"); - AddLine("MOV.F32 FSWZA[3], 0;"); - AddLine("MOV.F32 FSWZB[0], -1;"); - AddLine("MOV.F32 FSWZB[1], -1;"); - AddLine("MOV.F32 FSWZB[2], 1;"); - AddLine("MOV.F32 FSWZB[3], -1;"); - - if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { - AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); - } - for (const auto attribute : ir.GetOutputAttributes()) { - if (!IsGenericAttribute(attribute)) { - continue; - } - const u32 index = GetGenericAttributeIndex(attribute); - AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); - } - for (const u32 gpr : ir.GetRegisters()) { - AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); - } - for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { - AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast(pred)); - } -} - -void ARBDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("TEMP F{};", i); - } - for (u32 i = 0; i < num_flow_variables; ++i) { - AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); - } - - InitializeVariables(); - - VisitAST(ir.GetASTProgram()); -} - -void ARBDecompiler::DecompileBranchMode() { - static constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); - AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); - AddLine("TEMP SSY_TOP;"); - AddLine("TEMP PBK_TOP;"); - } - - AddLine("TEMP PC;"); - - if (!ir.IsFlowStackDisabled()) { - AddLine("MOV.U SSY_TOP.x, 0;"); - AddLine("MOV.U PBK_TOP.x, 0;"); - } - - InitializeVariables(); - - const auto basic_block_end = ir.GetBasicBlocks().end(); - auto basic_block_it = ir.GetBasicBlocks().begin(); - const u32 first_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", first_address); - - AddLine("REP;"); - - std::size_t num_blocks = 0; - while (basic_block_it != basic_block_end) { - const auto& [address, bb] = *basic_block_it; - ++num_blocks; - - AddLine("SEQ.S.CC RC.x, PC.x, {};", address); - AddLine("IF NE.x;"); - - VisitBlock(bb); - - ++basic_block_it; - - if (basic_block_it != basic_block_end) { - const auto op = std::get_if(&*bb[bb.size() - 1]); - if (!op || op->GetCode() != OperationCode::Branch) { - const u32 next_address = basic_block_it->first; - AddLine("MOV.U PC.x, {};", next_address); - AddLine("CONT;"); - } - } - - AddLine("ELSE;"); - } - AddLine("RET;"); - while (num_blocks--) { - AddLine("ENDIF;"); - } - - AddLine("ENDREP;"); -} - -void ARBDecompiler::VisitAST(const ASTNode& node) { - if (const auto ast = std::get_if(&*node->GetInnerData())) { - for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto if_then = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(if_then->condition); - ResetTemporaries(); - - AddLine("MOVC.U RC.x, {};", condition); - AddLine("IF NE.x;"); - for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("ENDIF;"); - } else if (const auto if_else = std::get_if(&*node->GetInnerData())) { - AddLine("ELSE;"); - for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - } else if (const auto decoded = std::get_if(&*node->GetInnerData())) { - VisitBlock(decoded->nodes); - } else if (const auto var_set = std::get_if(&*node->GetInnerData())) { - AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition)); - ResetTemporaries(); - } else if (const auto do_while = std::get_if(&*node->GetInnerData())) { - const std::string condition = VisitExpression(do_while->condition); - ResetTemporaries(); - AddLine("REP;"); - for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) { - VisitAST(current); - } - AddLine("MOVC.U RC.x, {};", condition); - AddLine("BRK (NE.x);"); - AddLine("ENDREP;"); - } else if (const auto ast_return = std::get_if(&*node->GetInnerData())) { - const bool is_true = ExprIsTrue(ast_return->condition); - if (!is_true) { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition)); - AddLine("IF NE.x;"); - ResetTemporaries(); - } - if (ast_return->kills) { - AddLine("KIL TR;"); - } else { - Exit(); - } - if (!is_true) { - AddLine("ENDIF;"); - } - } else if (const auto ast_break = std::get_if(&*node->GetInnerData())) { - if (ExprIsTrue(ast_break->condition)) { - AddLine("BRK;"); - } else { - AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition)); - AddLine("BRK (NE.x);"); - ResetTemporaries(); - } - } else if (std::holds_alternative(*node->GetInnerData())) { - // Nothing to do - } else { - UNREACHABLE(); - } -} - -std::string ARBDecompiler::VisitExpression(const Expr& node) { - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), - VisitExpression(expr->operand2)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); - return result; - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("P{}.x", static_cast(expr->predicate)); - } - if (const auto expr = std::get_if(&*node)) { - return Visit(ir.GetConditionCode(expr->cc)); - } - if (const auto expr = std::get_if(&*node)) { - return fmt::format("F{}.x", expr->var_index); - } - if (const auto expr = std::get_if(&*node)) { - return expr->value ? "0xffffffff" : "0"; - } - if (const auto expr = std::get_if(&*node)) { - std::string result = AllocTemporary(); - AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); - return result; - } - UNREACHABLE(); - return "0"; -} - -void ARBDecompiler::VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node); - } -} - -std::string ARBDecompiler::Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - const std::size_t index = static_cast(operation->GetCode()); - if (index >= OPERATION_DECOMPILERS.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", index); - return {}; - } - const auto decompiler = OPERATION_DECOMPILERS[index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return "{0, 0, 0, 0}.x"; - } - return fmt::format("R{}.x", index); - } - - if (const auto cv = std::get_if(&*node)) { - return fmt::format("CV{}.x", cv->GetIndex()); - } - - if (const auto immediate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); - return temporary; - } - - if (const auto predicate = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - AddLine("MOV.S {}, -1;", temporary); - break; - case Tegra::Shader::Pred::NeverExecute: - AddLine("MOV.S {}, 0;", temporary); - break; - default: - AddLine("MOV.S {}, P{}.x;", temporary, static_cast(index)); - break; - } - if (predicate->IsNegated()) { - AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); - } - return temporary; - } - - if (const auto abuf = std::get_if(&*node)) { - if (abuf->IsPhysicalBuffer()) { - UNIMPLEMENTED_MSG("Physical buffers are not implemented"); - return "{0, 0, 0, 0}.x"; - } - - const Attribute::Index index = abuf->GetIndex(); - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (index) { - case Attribute::Index::Position: { - if (stage == ShaderType::Geometry) { - return fmt::format("{}_position[{}].{}", StageInputName(stage), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.position.{}", StageInputName(stage), swizzle); - } - } - case Attribute::Index::TessCoordInstanceIDVertexID: - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - return "vertex.instance"; - case 3: - return "vertex.id"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - break; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "fragment.pointcoord.x"; - case 1: - return "fragment.pointcoord.y"; - } - UNIMPLEMENTED(); - break; - case Attribute::Index::FrontFacing: { - ASSERT(stage == ShaderType::Fragment); - ASSERT(element == 3); - const std::string temporary = AllocVectorTemporary(); - AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); - AddLine("MOV.U.CC RC.x, -RC;"); - AddLine("MOV.S {}.x, 0;", temporary); - AddLine("MOV.S {}.x (NE.x), -1;", temporary); - return fmt::format("{}.x", temporary); - } - default: - if (IsGenericAttribute(index)) { - if (stage == ShaderType::Geometry) { - return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), - Visit(abuf->GetBuffer()), swizzle); - } else { - return fmt::format("{}.attrib[{}].{}", StageInputName(stage), - GetGenericAttributeIndex(index), swizzle); - } - } - UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index); - break; - } - return "{0, 0, 0, 0}.x"; - } - - if (const auto cbuf = std::get_if(&*node)) { - std::string offset_string; - const auto& offset = cbuf->GetOffset(); - if (const auto imm = std::get_if(&*offset)) { - offset_string = std::to_string(imm->GetValue()); - } else { - offset_string = Visit(offset); - } - std::string temporary = AllocTemporary(); - AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); - return temporary; - } - - if (const auto gmem = std::get_if(&*node)) { - std::string temporary = AllocTemporary(); - AddLine("MOV {}, 0;", temporary); - AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); - return temporary; - } - - if (const auto lmem = std::get_if(&*node)) { - std::string temporary = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", temporary, temporary); - AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); - return temporary; - } - - if (const auto smem = std::get_if(&*node)) { - std::string temporary = Visit(smem->GetAddress()); - AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); - return temporary; - } - - if (const auto internal_flag = std::get_if(&*node)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)); - } - AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); - AddLine("IF NE.x;"); - VisitBlock(conditional->GetCode()); - AddLine("ENDIF;"); - return {}; - } - - if ([[maybe_unused]] const auto cmt = std::get_if(&*node)) { - // Uncommenting this will generate invalid code. GLASM lacks comments. - // AddLine("// {}", cmt->GetText()); - return {}; - } - - UNIMPLEMENTED(); - return {}; -} - -std::tuple ARBDecompiler::BuildCoords(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - UNIMPLEMENTED_IF(meta.sampler.is_indexed); - - const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array && - meta.sampler.type == Tegra::Shader::TextureType::TextureCube; - const std::size_t count = operation.GetOperandsCount(); - std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - if (meta.sampler.is_array) { - AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array)); - ++i; - } - if (meta.sampler.is_shadow) { - std::string compare = Visit(meta.depth_compare); - if (is_extended) { - ASSERT(i == 4); - std::string extra_coord = AllocVectorTemporary(); - AddLine("MOV.F {}.x, {};", extra_coord, compare); - return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0}; - } - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare); - ++i; - } - return {temporary, temporary, i}; -} - -std::string ARBDecompiler::BuildAoffi(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - if (meta.aoffi.empty()) { - return {}; - } - const std::string temporary = AllocVectorTemporary(); - std::size_t i = 0; - for (auto& node : meta.aoffi) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); - } - return fmt::format(", offset({})", temporary); -} - -std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { - // Read a bindless SSBO, return its address and set CC accordingly - // address = c[binding].xy - // length = c[binding].z - const u32 binding = global_memory_names.at(gmem.GetDescriptor()); - - const std::string pointer = AllocLongVectorTemporary(); - std::string temporary = AllocTemporary(); - - AddLine("PK64.U {}, c[{}];", pointer, binding); - AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), - Visit(gmem.GetBaseAddress())); - AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); - AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); - // Compare offset to length and set CC - AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); - return fmt::format("{}.x", pointer); -} - -void ARBDecompiler::Exit() { - if (stage != ShaderType::Fragment) { - AddLine("RET;"); - return; - } - - const auto safe_get_register = [this](u32 reg) -> std::string { - if (ir.GetRegisters().contains(reg)) { - return fmt::format("R{}.x", reg); - } - return "{0, 0, 0, 0}.x"; - }; - - const auto& header = ir.GetHeader(); - u32 current_reg = 0; - for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { - for (u32 component = 0; component < 4; ++component) { - if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { - continue; - } - AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), - safe_get_register(current_reg)); - ++current_reg; - } - } - if (header.ps.omap.depth) { - AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); - } - - AddLine("RET;"); -} - -std::string ARBDecompiler::Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string dest_name; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op - return {}; - } - dest_name = fmt::format("R{}.x", gpr->GetIndex()); - } else if (const auto abuf = std::get_if(&*dest)) { - const u32 element = abuf->GetElement(); - const char swizzle = Swizzle(element); - switch (const Attribute::Index index = abuf->GetIndex()) { - case Attribute::Index::Position: - dest_name = fmt::format("result.position.{}", swizzle); - break; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return {}; - case 1: - case 2: - if (!device.HasNvViewportArray2()) { - LOG_ERROR( - Render_OpenGL, - "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); - return {}; - } - dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; - break; - case 3: - dest_name = "result.pointsize.x"; - break; - } - break; - case Attribute::Index::ClipDistances0123: - dest_name = fmt::format("result.clip[{}].x", element); - break; - case Attribute::Index::ClipDistances4567: - dest_name = fmt::format("result.clip[{}].x", element + 4); - break; - default: - if (!IsGenericAttribute(index)) { - UNREACHABLE(); - return {}; - } - dest_name = - fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); - break; - } - } else if (const auto lmem = std::get_if(&*dest)) { - const std::string address = Visit(lmem->GetAddress()); - AddLine("SHR.U {}, {}, 2;", address, address); - dest_name = fmt::format("lmem[{}].x", address); - } else if (const auto smem = std::get_if(&*dest)) { - AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); - ResetTemporaries(); - return {}; - } else if (const auto gmem = std::get_if(&*dest)) { - AddLine("IF NE.x;"); - AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); - AddLine("ENDIF;"); - ResetTemporaries(); - return {}; - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", dest_name, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::Select(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), - Visit(operation[2])); - return temporary; -} - -std::string ARBDecompiler::FClamp(Operation operation) { - // 1.0f in hex, replace with std::bit_cast on C++20 - static constexpr u32 POSITIVE_ONE = 0x3f800000; - - std::string temporary = AllocTemporary(); - const Node& value = operation[0]; - const Node& low = operation[1]; - const Node& high = operation[2]; - const auto* const imm_low = std::get_if(&*low); - const auto* const imm_high = std::get_if(&*high); - if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { - AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); - } else { - AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); - AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); - } - return temporary; -} - -std::string ARBDecompiler::FCastHalf0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FCastHalf1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); - AddLine("MOV {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::FSqrt(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); - AddLine("RCP.F32 {}, {};", temporary, temporary); - return temporary; -} - -std::string ARBDecompiler::FSwizzleAdd(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); - return fmt::format("{}.x", temporary); - } - - AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); - AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); - AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); - AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); - AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); - AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); - AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HAdd2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HMul2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HFma2(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - const std::string tmp3 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); - AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); - AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HAbsolute(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HNegate(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("MOVC.S RC.x, {};", Visit(operation[1])); - AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); - AddLine("MOVC.S RC.x, {};", Visit(operation[2])); - AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HClamp(Operation operation) { - const std::string tmp1 = AllocVectorTemporary(); - const std::string tmp2 = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); - AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); - AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); - AddLine("PK2H.F {}.x, {};", tmp1, tmp1); - return fmt::format("{}.x", tmp1); -} - -std::string ARBDecompiler::HCastFloat(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); - AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HUnpack(Operation operation) { - std::string operand = Visit(operation[0]); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H0_H0: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.y, {}.x;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - case Tegra::Shader::HalfType::H1_H1: { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, operand); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); - } - } - UNREACHABLE(); - return "{0, 0, 0, 0}.x"; -} - -std::string ARBDecompiler::HMergeF32(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH0(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.x, {}.z;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HMergeH1(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); - AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); - AddLine("MOV.U {}.y, {}.w;", temporary, temporary); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::HPack2(Operation operation) { - const std::string temporary = AllocVectorTemporary(); - AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); - AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); - AddLine("PK2H.F {}.x, {};", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const Tegra::Shader::Pred index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = fmt::format("P{}.x", static_cast(index)); - } else if (const auto internal_flag = std::get_if(&*dest)) { - const std::size_t index = static_cast(internal_flag->GetFlag()); - target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); - } else { - UNREACHABLE(); - ResetTemporaries(); - return {}; - } - - AddLine("MOV.U {}, {};", target, Visit(src)); - ResetTemporaries(); - return {}; -} - -std::string ARBDecompiler::LogicalPick2(Operation operation) { - std::string temporary = AllocTemporary(); - const u32 index = std::get(*operation[1]).GetValue(); - AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); - return temporary; -} - -std::string ARBDecompiler::LogicalAnd2(Operation operation) { - std::string temporary = AllocTemporary(); - const std::string op = Visit(operation[0]); - AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); - return temporary; -} - -std::string ARBDecompiler::FloatOrdered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, -1;", temporary); - AddLine("MOV.S {} (NAN.x), 0;", temporary); - AddLine("MOV.S {} (NAN.y), 0;", temporary); - return temporary; -} - -std::string ARBDecompiler::FloatUnordered(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); - AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("MOV.S {} (NAN.x), -1;", temporary); - AddLine("MOV.S {} (NAN.y), -1;", temporary); - return temporary; -} - -std::string ARBDecompiler::LogicalAddCarry(Operation operation) { - std::string temporary = AllocTemporary(); - AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); - AddLine("MOV.S {}, 0;", temporary); - AddLine("IF CF.x;"); - AddLine("MOV.S {}, -1;", temporary); - AddLine("ENDIF;"); - return temporary; -} - -std::string ARBDecompiler::Texture(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string_view opcode = "TEX"; - std::string extra; - if (meta.bias) { - ASSERT(!meta.lod); - opcode = "TXB"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); - } else { - const std::string bias = AllocTemporary(); - AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); - extra = fmt::format(" {},", bias); - } - } - if (meta.lod) { - ASSERT(!meta.bias); - opcode = "TXL"; - - if (swizzle < 4) { - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } else { - const std::string lod = AllocTemporary(); - AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); - extra = fmt::format(" {},", lod); - } - } - - AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - std::string comp; - if (!meta.sampler.is_shadow) { - const auto& immediate = std::get(*meta.component); - comp = fmt::format(".{}", Swizzle(immediate.GetValue())); - } - - AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; - AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureQueryLod(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const std::string temporary = AllocVectorTemporary(); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - - ASSERT(!meta.sampler.is_array); - - const std::size_t count = operation.GetOperandsCount(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); - AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); - AddLine("TRUNC.S {}, {};", temporary, temporary); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TexelFetch(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const auto [coords, temporary, swizzle] = BuildCoords(operation); - - if (!meta.sampler.is_buffer) { - ASSERT(swizzle < 4); - AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); - } - AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta), - BuildAoffi(operation)); - AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; - const std::string ddx = AllocVectorTemporary(); - const std::string ddy = AllocVectorTemporary(); - const std::string coord = std::get<1>(BuildCoords(operation)); - - const std::size_t num_components = meta.derivates.size() / 2; - for (std::size_t index = 0; index < num_components; ++index) { - const char swizzle = Swizzle(index); - AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); - AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); - } - - const std::string_view result = coord; - AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, - TextureType(meta), BuildAoffi(operation)); - AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); - return fmt::format("{}.x", result); -} - -std::string ARBDecompiler::ImageLoad(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t count = operation.GetOperandsCount(); - const std::string_view type = ImageType(meta.image.type); - - const std::string temporary = AllocVectorTemporary(); - for (std::size_t i = 0; i < count; ++i) { - AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); - } - AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); - AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::ImageStore(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; - const std::size_t num_coords = operation.GetOperandsCount(); - const std::size_t num_values = meta.values.size(); - const std::string_view type = ImageType(meta.image.type); - - const std::string coord = AllocVectorTemporary(); - const std::string value = AllocVectorTemporary(); - for (std::size_t i = 0; i < num_coords; ++i) { - AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); - } - for (std::size_t i = 0; i < num_values; ++i) { - AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); - } - AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); - return {}; -} - -std::string ARBDecompiler::Branch(Operation operation) { - const auto target = std::get(*operation[0]); - AddLine("MOV.U PC.x, {};", target.GetValue()); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::BranchIndirect(Operation operation) { - AddLine("MOV.U PC.x, {};", Visit(operation[0])); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const u32 target = std::get(*operation[0]).GetValue(); - const std::string_view stack_name = StackName(stack); - AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); - AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - return {}; -} - -std::string ARBDecompiler::PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const std::string_view stack_name = StackName(stack); - AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); - AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); - AddLine("CONT;"); - return {}; -} - -std::string ARBDecompiler::Exit(Operation) { - Exit(); - return {}; -} - -std::string ARBDecompiler::Discard(Operation) { - AddLine("KIL TR;"); - return {}; -} - -std::string ARBDecompiler::EmitVertex(Operation) { - AddLine("EMIT;"); - return {}; -} - -std::string ARBDecompiler::EndPrimitive(Operation) { - AddLine("ENDPRIM;"); - return {}; -} - -std::string ARBDecompiler::InvocationId(Operation) { - return "primitive.invocation"; -} - -std::string ARBDecompiler::YNegate(Operation) { - LOG_WARNING(Render_OpenGL, "(STUBBED)"); - std::string temporary = AllocTemporary(); - AddLine("MOV.F {}, 1;", temporary); - return temporary; -} - -std::string ARBDecompiler::ThreadId(Operation) { - return fmt::format("{}.threadid", StageInputName(stage)); -} - -std::string ARBDecompiler::ShuffleIndexed(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "NV_shader_thread_shuffle is missing. Kepler or better is required."); - return Visit(operation[0]); - } - const std::string temporary = AllocVectorTemporary(); - AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), - Visit(operation[1])); - AddLine("MOV.U {}.x, {}.y;", temporary, temporary); - return fmt::format("{}.x", temporary); -} - -std::string ARBDecompiler::Barrier(Operation) { - AddLine("BAR;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGroup(Operation) { - AddLine("MEMBAR.CTA;"); - return {}; -} - -std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { - AddLine("MEMBAR;"); - return {}; -} - -} // Anonymous namespace - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier) { - return ARBDecompiler(device, ir, registry, stage, identifier).Code(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h deleted file mode 100644 index 6afc87220..000000000 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "common/common_types.h" - -namespace Tegra::Engines { -enum class ShaderType : u32; -} - -namespace VideoCommon::Shader { -class ShaderIR; -class Registry; -} // namespace VideoCommon::Shader - -namespace OpenGL { - -class Device; - -std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ceb3abcb2..3551dbdcc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -54,40 +54,6 @@ namespace { constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - -template -TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, - ShaderType shader_type, size_t index = 0) { - if constexpr (std::is_same_v) { - if (entry.is_separated) { - const u32 buffer_1 = entry.buffer; - const u32 buffer_2 = entry.secondary_buffer; - const u32 offset_1 = entry.offset; - const u32 offset_2 = entry.secondary_offset; - const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); - const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); - return TextureHandle(handle_1 | handle_2, via_header_index); - } - } - if (entry.is_bindless) { - const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); - return TextureHandle(raw, via_header_index); - } - const u32 buffer = engine.GetBoundBuffer(); - const u64 offset = (entry.offset + index) * sizeof(u32); - return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); -} - /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -119,44 +85,6 @@ std::pair TransformFeedbackEnum(u8 location) { void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } - -ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { - if (entry.is_buffer) { - return ImageViewType::Buffer; - } - switch (entry.type) { - case Tegra::Shader::TextureType::Texture1D: - return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; - case Tegra::Shader::TextureType::Texture2D: - return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; - case Tegra::Shader::TextureType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::TextureType::TextureCube: - return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - -ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { - switch (entry.type) { - case Tegra::Shader::ImageType::Texture1D: - return ImageViewType::e1D; - case Tegra::Shader::ImageType::Texture1DArray: - return ImageViewType::e1DArray; - case Tegra::Shader::ImageType::Texture2D: - return ImageViewType::e2D; - case Tegra::Shader::ImageType::Texture2DArray: - return ImageViewType::e2DArray; - case Tegra::Shader::ImageType::Texture3D: - return ImageViewType::e3D; - case Tegra::Shader::ImageType::TextureBuffer: - return ImageViewType::Buffer; - } - UNREACHABLE(); - return ImageViewType::e2D; -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), - fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), - async_shaders(emu_window_) { - if (device.UseAsynchronousShaders()) { - async_shaders.AllocateWorkers(); - } -} + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} RasterizerOpenGL::~RasterizerOpenGL() = default; @@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() { } } -void RasterizerOpenGL::SetupShaders(bool is_indexed) { - u32 clip_distances = 0; - - std::array shaders{}; - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeGraphicsDescriptors(); - - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = maxwell3d.regs.shader_config[index]; - const auto program{static_cast(index)}; - - // Skip stages that are not enabled - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - switch (program) { - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(0); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(0); - break; - default: - break; - } - continue; - } - // Currently this stages are not supported in the OpenGL backend. - // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL - if (program == Maxwell::ShaderProgram::TesselationControl || - program == Maxwell::ShaderProgram::TesselationEval) { - continue; - } - - Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); - const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; - switch (program) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - program_manager.UseVertexShader(program_handle); - break; - case Maxwell::ShaderProgram::Geometry: - program_manager.UseGeometryShader(program_handle); - break; - case Maxwell::ShaderProgram::Fragment: - program_manager.UseFragmentShader(program_handle); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - break; - } - - // Stage indices are 0 - 5 - const size_t stage = index == 0 ? 0 : index - 1; - shaders[stage] = shader; - - SetupDrawTextures(shader, stage); - SetupDrawImages(shader, stage); - - buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); - - buffer_cache.UnbindGraphicsStorageBuffers(stage); - u32 ssbo_index = 0; - for (const auto& buffer : shader->GetEntries().global_memory_entries) { - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, - buffer.cbuf_offset, buffer.is_written); - ++ssbo_index; - } - - // Workaround for Intel drivers. - // When a clip distance is enabled but not set in the shader it crops parts of the screen - // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the - // clip distances only when it's written by a shader stage. - clip_distances |= shader->GetEntries().clip_distances; - - // When VertexA is enabled, we have dual vertex shaders - if (program == Maxwell::ShaderProgram::VertexA) { - // VertexB was combined with VertexA, so we skip the VertexB iteration - ++index; - } - } - SyncClipEnabled(clip_distances); - maxwell3d.dirty.flags[Dirty::Shaders] = false; - - buffer_cache.UpdateGraphicsBuffers(is_indexed); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - buffer_cache.BindHostGeometryBuffers(is_indexed); - - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - const Shader* const shader = shaders[stage]; - if (!shader) { - continue; - } - buffer_cache.BindHostStageBuffers(stage); - const auto& base = device.GetBaseBindings(stage); - BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, - texture_index, image_index); - } -} - void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(title_id, stop_loading, callback); -} + const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); @@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - SetupShaders(is_indexed); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); @@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { gpu.TickWork(); } -void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { - Shader* const kernel = shader_cache.GetComputeKernel(code_addr); - - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - BindComputeTextures(kernel); - - const auto& entries = kernel->GetEntries(); - buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); - buffer_cache.UnbindComputeStorageBuffers(); - u32 ssbo_index = 0; - for (const auto& buffer : entries.global_memory_entries) { - buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, - buffer.is_written); - ++ssbo_index; - } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); - - const auto& launch_desc = kepler_compute.launch_description; - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); - ++num_queued_commands; +void RasterizerOpenGL::DispatchCompute() { + UNREACHABLE_MSG("Not implemented"); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::BindComputeTextures(Shader* kernel) { - image_view_indices.clear(); - sampler_handles.clear(); - - texture_cache.SynchronizeComputeDescriptors(); - - SetupComputeTextures(kernel); - SetupComputeImages(kernel); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - program_manager.BindCompute(kernel->GetHandle()); - size_t image_view_index = 0; - size_t texture_index = 0; - size_t image_index = 0; - BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index); -} - -void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture, - GLuint base_image, size_t& image_view_index, - size_t& texture_index, size_t& image_index) { - const GLuint* const samplers = sampler_handles.data() + texture_index; - const GLuint* const textures = texture_handles.data() + texture_index; - const GLuint* const images = image_handles.data() + image_index; - - const size_t num_samplers = entries.samplers.size(); - for (const auto& sampler : entries.samplers) { - for (size_t i = 0; i < sampler.size; ++i) { - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler)); - texture_handles[texture_index++] = handle; - } - } - const size_t num_images = entries.images.size(); - for (size_t unit = 0; unit < num_images; ++unit) { - // TODO: Mark as modified - const ImageViewId image_view_id = image_view_ids[image_view_index++]; - const ImageView& image_view = texture_cache.GetImageView(image_view_id); - const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit])); - image_handles[image_index] = handle; - ++image_index; - } - if (num_samplers > 0) { - glBindSamplers(base_texture, static_cast(num_samplers), samplers); - glBindTextures(base_texture, static_cast(num_samplers), textures); - } - if (num_images > 0) { - glBindImageTextures(base_image, static_cast(num_images), images); - } -} - -void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().samplers) { - const auto shader_type = static_cast(stage_index); - for (size_t index = 0; index < entry.size; ++index) { - const auto handle = - GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index); - const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : kernel->GetEntries().samplers) { - for (size_t i = 0; i < entry.size; ++i) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i); - const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); - sampler_handles.push_back(sampler->Handle()); - image_view_indices.push_back(handle.image); - } - } -} - -void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) { - const bool via_header_index = - maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; - for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast(stage_index); - const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type); - image_view_indices.push_back(handle.image); - } -} - -void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { - const bool via_header_index = kepler_compute.launch_description.linked_tsc; - for (const auto& entry : shader->GetEntries().images) { - const auto handle = - GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute); - image_view_indices.push_back(handle.image); - } -} - void RasterizerOpenGL::SyncState() { SyncViewport(); SyncRasterizeEnable(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d30ad698f..1f58f8791 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -28,11 +28,9 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core::Memory { @@ -81,7 +79,7 @@ public: void Draw(bool is_indexed, bool is_instanced) override; void Clear() override; - void DispatchCompute(GPUVAddr code_addr) override; + void DispatchCompute() override; void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; @@ -118,36 +116,11 @@ public: return num_queued_commands > 0; } - VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { - return async_shaders; - } - - const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { - return async_shaders; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES; - void BindComputeTextures(Shader* kernel); - - void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, - size_t& image_view_index, size_t& texture_index, size_t& image_index); - - /// Configures the current textures to use for the draw command. - void SetupDrawTextures(const Shader* shader, size_t stage_index); - - /// Configures the textures used in a compute shader. - void SetupComputeTextures(const Shader* kernel); - - /// Configures images in a graphics shader. - void SetupDrawImages(const Shader* shader, size_t stage_index); - - /// Configures images in a compute shader. - void SetupComputeImages(const Shader* shader); - /// Syncs state to match guest's void SyncState(); @@ -230,8 +203,6 @@ private: /// End a transform feedback void EndTransformFeedback(); - void SetupShaders(bool is_indexed); - Tegra::GPU& gpu; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; @@ -251,8 +222,6 @@ private: AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; - VideoCommon::Shader::AsyncShaders async_shaders; - boost::container::static_vector image_view_indices; std::array image_view_ids; boost::container::static_vector sampler_handles; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5a01c59ec..4dd166156 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -20,307 +20,19 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/shader/memory_util.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" #include "video_core/shader_notify.h" namespace OpenGL { -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::GetShaderAddress; -using VideoCommon::Shader::GetShaderCode; -using VideoCommon::Shader::GetUniqueIdentifier; -using VideoCommon::Shader::KERNEL_MAIN_OFFSET; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::Registry; -using VideoCommon::Shader::ShaderIR; -using VideoCommon::Shader::STAGE_MAIN_OFFSET; - -namespace { - -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; - -/// Gets the shader type from a Maxwell program type -constexpr GLenum GetGLShaderType(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_SHADER; - case ShaderType::Geometry: - return GL_GEOMETRY_SHADER; - case ShaderType::Fragment: - return GL_FRAGMENT_SHADER; - case ShaderType::Compute: - return GL_COMPUTE_SHADER; - default: - return GL_NONE; - } -} - -constexpr const char* GetShaderTypeName(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return "VS"; - case ShaderType::TesselationControl: - return "HS"; - case ShaderType::TesselationEval: - return "DS"; - case ShaderType::Geometry: - return "GS"; - case ShaderType::Fragment: - return "FS"; - case ShaderType::Compute: - return "CS"; - } - return "UNK"; -} - -constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { - switch (program_type) { - case Maxwell::ShaderProgram::VertexA: - case Maxwell::ShaderProgram::VertexB: - return ShaderType::Vertex; - case Maxwell::ShaderProgram::TesselationControl: - return ShaderType::TesselationControl; - case Maxwell::ShaderProgram::TesselationEval: - return ShaderType::TesselationEval; - case Maxwell::ShaderProgram::Geometry: - return ShaderType::Geometry; - case Maxwell::ShaderProgram::Fragment: - return ShaderType::Fragment; - } - return {}; -} - -constexpr GLenum AssemblyEnum(ShaderType shader_type) { - switch (shader_type) { - case ShaderType::Vertex: - return GL_VERTEX_PROGRAM_NV; - case ShaderType::TesselationControl: - return GL_TESS_CONTROL_PROGRAM_NV; - case ShaderType::TesselationEval: - return GL_TESS_EVALUATION_PROGRAM_NV; - case ShaderType::Geometry: - return GL_GEOMETRY_PROGRAM_NV; - case ShaderType::Fragment: - return GL_FRAGMENT_PROGRAM_NV; - case ShaderType::Compute: - return GL_COMPUTE_PROGRAM_NV; - } - return {}; -} - -std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { - return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); -} - -std::shared_ptr MakeRegistry(const ShaderDiskCacheEntry& entry) { - const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, - entry.graphics_info, entry.compute_info}; - auto registry = std::make_shared(entry.type, info); - for (const auto& [address, value] : entry.keys) { - const auto [buffer, offset] = address; - registry->InsertKey(buffer, offset, value); - } - for (const auto& [offset, sampler] : entry.bound_samplers) { - registry->InsertBoundSampler(offset, sampler); - } - for (const auto& [key, sampler] : entry.bindless_samplers) { - const auto [buffer, offset] = key; - registry->InsertBindlessSampler(buffer, offset, sampler); - } - return registry; -} - -std::unordered_set GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast(format)); - } - return supported_formats; -} - -} // Anonymous namespace - -ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { - if (device.UseDriverCache()) { - // Ignore hint retrievable if we are using the driver cache - hint_retrievable = false; - } - const std::string shader_id = MakeShaderID(unique_identifier, shader_type); - LOG_INFO(Render_OpenGL, "{}", shader_id); - - auto program = std::make_shared(); - - if (device.UseAssemblyShaders()) { - const std::string arb = - DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); - - GLuint& arb_prog = program->assembly_program.handle; - -// Commented out functions signal OpenGL errors but are compatible with apitrace. -// Use them only to capture and replay on apitrace. -#if 0 - glGenProgramsNV(1, &arb_prog); - glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast(arb.size()), - reinterpret_cast(arb.data())); -#else - glGenProgramsARB(1, &arb_prog); - glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(arb.size()), arb.data()); -#endif - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "\n{}", arb); - } - } else { - const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); - OGLShader shader; - shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); - - program->source_program.Create(true, hint_retrievable, shader.handle); - } - - return program; -} - -Shader::Shader(std::shared_ptr registry_, ShaderEntries entries_, - ProgramSharedPtr program_, bool is_built_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, - is_built{is_built_} { - handle = program->assembly_program.handle; - if (handle == 0) { - handle = program->source_program.handle; - } - if (is_built) { - ASSERT(handle != 0); - } -} +Shader::Shader() = default; Shader::~Shader() = default; -GLuint Shader::GetHandle() const { - DEBUG_ASSERT(registry->IsConsistent()); - return handle; -} - -bool Shader::IsBuilt() const { - return is_built; -} - -void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { - program->source_program = std::move(new_program); - handle = program->source_program.handle; - is_built = true; -} - -void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { - program->assembly_program = std::move(new_program); - handle = program->assembly_program.handle; - is_built = true; -} - -std::unique_ptr Shader::CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, - ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { - const auto shader_type = GetShaderType(program_type); - - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = - BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, shader_type), - std::move(program), true)); - } else { - // Required for entries - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto entries = MakeEntries(params.device, ir, shader_type); - - async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, - std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, - COMPILER_SETTINGS, *registry, cpu_addr); - - auto program = std::make_shared(); - return std::unique_ptr( - new Shader(std::move(registry), std::move(entries), std::move(program), false)); - } -} - -std::unique_ptr Shader::CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code) { - auto& gpu = params.gpu; - gpu.ShaderNotify().MarkSharderBuilding(); - - auto registry = std::make_shared(ShaderType::Compute, params.engine); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - const u64 uid = params.unique_identifier; - auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); - - ShaderDiskCacheEntry entry; - entry.type = ShaderType::Compute; - entry.code = std::move(code); - entry.unique_identifier = uid; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.compute_info = registry->GetComputeInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); - - gpu.ShaderNotify().MarkShaderComplete(); - - return std::unique_ptr(new Shader(std::move(registry), - MakeEntries(params.device, ir, ShaderType::Compute), - std::move(program))); -} - -std::unique_ptr Shader::CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader) { - return std::unique_ptr(new Shader( - precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); -} - ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - disk_cache.BindTitleID(title_id); - const std::optional transferable = disk_cache.LoadTransferable(); - - LOG_INFO(Render_OpenGL, "Total Shader Count: {}", - transferable.has_value() ? transferable->size() : 0); - - if (!transferable) { - return; - } - - std::vector gl_cache; - if (!device.UseAssemblyShaders() && !device.UseDriverCache()) { - // Only load precompiled cache when we are not using assembly shaders - gl_cache = disk_cache.LoadPrecompiled(); - } - const auto supported_formats = GetSupportedFormats(); - - // Track if precompiled cache was altered during loading to know if we have to - // serialize the virtual precompiled cache file back to the hard drive - bool precompiled_cache_altered = false; - - // Inform the frontend about shader build initialization - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); - } - - std::mutex mutex; - std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool gl_cache_failed = false; - - const auto find_precompiled = [&gl_cache](u64 id) { - return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier); - }; - - const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end) { - const auto scope = context->Acquire(); - - for (std::size_t i = begin; i < end; ++i) { - if (stop_loading.stop_requested()) { - return; - } - const auto& entry = (*transferable)[i]; - const u64 uid = entry.unique_identifier; - const auto it = find_precompiled(uid); - const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; - - const bool is_compute = entry.type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - auto registry = MakeRegistry(entry); - const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); - - ProgramSharedPtr program; - if (precompiled_entry) { - // If the shader is precompiled, attempt to load it with - program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); - if (!program) { - gl_cache_failed = true; - } - } - if (!program) { - // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, uid, ir, *registry, true); - } - - PrecompiledShader shader; - shader.program = std::move(program); - shader.registry = std::move(registry); - shader.entries = MakeEntries(device, ir, entry.type); - - std::scoped_lock lock{mutex}; - if (callback) { - callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - transferable->size()); - } - runtime_cache.emplace(entry.unique_identifier, std::move(shader)); - } - }; - - const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())}; - const std::size_t bucket_size{transferable->size() / num_workers}; - std::vector> contexts(num_workers); - std::vector threads(num_workers); - for (std::size_t i = 0; i < num_workers; ++i) { - const bool is_last_worker = i + 1 == num_workers; - const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; - - // On some platforms the shared context has to be created from the GUI thread - contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(worker, contexts[i].get(), start, end); - } - for (auto& thread : threads) { - thread.join(); - } - - if (gl_cache_failed) { - // Invalidate the precompiled cache if a shader dumped shader was rejected - disk_cache.InvalidatePrecompiled(); - precompiled_cache_altered = true; - return; - } - if (stop_loading.stop_requested()) { - return; - } - - if (device.UseAssemblyShaders() || device.UseDriverCache()) { - // Don't store precompiled binaries for assembly shaders or when using the driver cache - return; - } - - // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw - // before precompiling them - - for (std::size_t i = 0; i < transferable->size(); ++i) { - const u64 id = (*transferable)[i].unique_identifier; - const auto it = find_precompiled(id); - if (it == gl_cache.end()) { - const GLuint program = runtime_cache.at(id).program->source_program.handle; - disk_cache.SavePrecompiled(id, program); - precompiled_cache_altered = true; - } - } - - if (precompiled_cache_altered) { - disk_cache.SaveVirtualPrecompiledFile(); - } -} - -ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats) { - if (!supported_formats.contains(precompiled_entry.binary_format)) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); - return {}; - } - - auto program = std::make_shared(); - GLuint& handle = program->source_program.handle; - handle = glCreateProgram(); - glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), - static_cast(precompiled_entry.binary.size())); - - GLint link_status; - glGetProgramiv(handle, GL_LINK_STATUS, &link_status); - if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); - return {}; - } - - return program; -} - -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!maxwell3d.dirty.flags[Dirty::Shaders]) { - auto* last_shader = last_shaders[static_cast(program)]; - if (last_shader->IsBuilt()) { - return last_shader; - } - } - - const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; - - if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { - auto completed_work = async_shaders.GetCompletedWork(); - for (auto& work : completed_work) { - Shader* shader = TryGet(work.cpu_address); - gpu.ShaderNotify().MarkShaderComplete(); - if (shader == nullptr) { - continue; - } - using namespace VideoCommon::Shader; - if (work.backend == AsyncShaders::Backend::OpenGL) { - shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); - } else if (work.backend == AsyncShaders::Backend::GLASM) { - shader->AsyncGLASMBuilt(std::move(work.program.glasm)); - } - - auto& registry = shader->GetRegistry(); - - ShaderDiskCacheEntry entry; - entry.type = work.shader_type; - entry.code = std::move(work.code); - entry.code_b = std::move(work.code_b); - entry.unique_identifier = work.uid; - entry.bound_buffer = registry.GetBoundBuffer(); - entry.graphics_info = registry.GetGraphicsInfo(); - entry.keys = registry.GetKeys(); - entry.bound_samplers = registry.GetBoundSamplers(); - entry.bindless_samplers = registry.GetBindlessSamplers(); - disk_cache.SaveEntry(std::move(entry)); - } - } - - // Look up shader in the cache based on address - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(address)}; - if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { - return last_shaders[static_cast(program)] = shader; - } - - const u8* const host_ptr{gpu_memory.GetPointer(address)}; - - // No shader found - create a new one - ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; - ProgramCode code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = gpu_memory.GetPointer(address_b); - code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); - } - const std::size_t code_size = code.size() * sizeof(u64); - - const u64 unique_identifier = GetUniqueIdentifier( - GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - - const ShaderParameters params{gpu, maxwell3d, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr shader; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), - async_shaders, cpu_addr.value_or(0)); - } else { - shader = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = shader.get(); - if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code_size); - } else { - null_shader = std::move(shader); - } - - return last_shaders[static_cast(program)] = result; -} - -Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - const std::optional cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; - - if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { - return kernel; - } - - // No kernel found, create a new one - const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; - ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; - const std::size_t code_size{code.size() * sizeof(u64)}; - const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - - const ShaderParameters params{gpu, kepler_compute, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; - - std::unique_ptr kernel; - const auto found = runtime_cache.find(unique_identifier); - if (found == runtime_cache.end()) { - kernel = Shader::CreateKernelFromMemory(params, std::move(code)); - } else { - kernel = Shader::CreateFromCache(params, found->second); - } - - Shader* const result = kernel.get(); - if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code_size); - } else { - null_kernel = std::move(kernel); - } - return result; -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b30308b6f..ad3d15a76 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -19,10 +19,6 @@ #include "common/common_types.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -33,10 +29,6 @@ namespace Core::Frontend { class EmuWindow; } -namespace VideoCommon::Shader { -class AsyncShaders; -} - namespace OpenGL { class Device; @@ -44,77 +36,10 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct ProgramHandle { - OGLProgram source_program; - OGLAssemblyProgram assembly_program; -}; -using ProgramSharedPtr = std::shared_ptr; - -struct PrecompiledShader { - ProgramSharedPtr program; - std::shared_ptr registry; - ShaderEntries entries; -}; - -struct ShaderParameters { - Tegra::GPU& gpu; - Tegra::Engines::ConstBufferEngineInterface& engine; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - VAddr cpu_addr; - const u8* host_ptr; - u64 unique_identifier; -}; - -ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, - u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - bool hint_retrievable = false); - -class Shader final { +class Shader { public: + explicit Shader(); ~Shader(); - - /// Gets the GL program handle for the shader - GLuint GetHandle() const; - - bool IsBuilt() const; - - /// Gets the shader entries for the shader - const ShaderEntries& GetEntries() const { - return entries; - } - - const VideoCommon::Shader::Registry& GetRegistry() const { - return *registry; - } - - /// Mark a OpenGL shader as built - void AsyncOpenGLBuilt(OGLProgram new_program); - - /// Mark a GLASM shader as built - void AsyncGLASMBuilt(OGLAssemblyProgram new_program); - - static std::unique_ptr CreateStageFromMemory( - const ShaderParameters& params, Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b, - VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); - - static std::unique_ptr CreateKernelFromMemory(const ShaderParameters& params, - ProgramCode code); - - static std::unique_ptr CreateFromCache(const ShaderParameters& params, - const PrecompiledShader& precompiled_shader); - -private: - explicit Shader(std::shared_ptr registry, ShaderEntries entries, - ProgramSharedPtr program, bool is_built_ = true); - - std::shared_ptr registry; - ShaderEntries entries; - ProgramSharedPtr program; - GLuint handle = 0; - bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { @@ -126,36 +51,13 @@ public: Tegra::MemoryManager& gpu_memory_, const Device& device_); ~ShaderCacheOpenGL() override; - /// Loads disk cache for the current game - void LoadDiskCache(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback); - - /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program, - VideoCommon::Shader::AsyncShaders& async_shaders); - - /// Gets a compute kernel in the passed address - Shader* GetComputeKernel(GPUVAddr code_addr); - private: - ProgramSharedPtr GeneratePrecompiledProgram( - const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, - const std::unordered_set& supported_formats); - Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; - - ShaderDiskCacheOpenGL disk_cache; - std::unordered_map runtime_cache; - - std::unique_ptr null_shader; - std::unique_ptr null_kernel; - - std::array last_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp deleted file mode 100644 index 9c28498e8..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ /dev/null @@ -1,2986 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/ast.h" -#include "video_core/shader/node.h" -#include "video_core/shader/shader_ir.h" -#include "video_core/shader/transform_feedback.h" - -namespace OpenGL { - -namespace { - -using Tegra::Engines::ShaderType; -using Tegra::Shader::Attribute; -using Tegra::Shader::Header; -using Tegra::Shader::IpaInterpMode; -using Tegra::Shader::IpaMode; -using Tegra::Shader::IpaSampleMode; -using Tegra::Shader::PixelImap; -using Tegra::Shader::Register; -using Tegra::Shader::TextureType; - -using namespace VideoCommon::Shader; -using namespace std::string_literals; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using Operation = const OperationNode&; - -class ASTDecompiler; -class ExprDecompiler; - -enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; - -constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; - -constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; -constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; - -struct TextureOffset {}; -struct TextureDerivates {}; -using TextureArgument = std::pair; -using TextureIR = std::variant; - -constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast(Maxwell::MaxConstBufferSize) / sizeof(u32); -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); - -constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -}} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; - -class ShaderWriter final { -public: - void AddExpression(std::string_view text) { - DEBUG_ASSERT(scope >= 0); - if (!text.empty()) { - AppendIndentation(); - } - shader_source += text; - } - - // Forwards all arguments directly to libfmt. - // Note that all formatting requirements for fmt must be - // obeyed when using this function. (e.g. {{ must be used - // printing the character '{' is desirable. Ditto for }} and '}', - // etc). - template - void AddLine(std::string_view text, Args&&... args) { - AddExpression(fmt::format(fmt::runtime(text), std::forward(args)...)); - AddNewLine(); - } - - void AddNewLine() { - DEBUG_ASSERT(scope >= 0); - shader_source += '\n'; - } - - std::string GenerateTemporary() { - return fmt::format("tmp{}", temporary_index++); - } - - std::string GetResult() { - return std::move(shader_source); - } - - s32 scope = 0; - -private: - void AppendIndentation() { - shader_source.append(static_cast(scope) * 4, ' '); - } - - std::string shader_source; - u32 temporary_index = 1; -}; - -class Expression final { -public: - Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} { - ASSERT(type != Type::Void); - } - Expression() : type{Type::Void} {} - - Type GetType() const { - return type; - } - - std::string GetCode() const { - return code; - } - - void CheckVoid() const { - ASSERT(type == Type::Void); - } - - std::string As(Type type_) const { - switch (type_) { - case Type::Bool: - return AsBool(); - case Type::Bool2: - return AsBool2(); - case Type::Float: - return AsFloat(); - case Type::Int: - return AsInt(); - case Type::Uint: - return AsUint(); - case Type::HalfFloat: - return AsHalfFloat(); - default: - UNREACHABLE_MSG("Invalid type"); - return code; - } - } - - std::string AsBool() const { - switch (type) { - case Type::Bool: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsBool2() const { - switch (type) { - case Type::Bool2: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsFloat() const { - switch (type) { - case Type::Float: - return code; - case Type::Uint: - return fmt::format("utof({})", code); - case Type::Int: - return fmt::format("itof({})", code); - case Type::HalfFloat: - return fmt::format("utof(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsInt() const { - switch (type) { - case Type::Float: - return fmt::format("ftoi({})", code); - case Type::Uint: - return fmt::format("int({})", code); - case Type::Int: - return code; - case Type::HalfFloat: - return fmt::format("int(packHalf2x16({}))", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsUint() const { - switch (type) { - case Type::Float: - return fmt::format("ftou({})", code); - case Type::Uint: - return code; - case Type::Int: - return fmt::format("uint({})", code); - case Type::HalfFloat: - return fmt::format("packHalf2x16({})", code); - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - - std::string AsHalfFloat() const { - switch (type) { - case Type::Float: - return fmt::format("unpackHalf2x16(ftou({}))", code); - case Type::Uint: - return fmt::format("unpackHalf2x16({})", code); - case Type::Int: - return fmt::format("unpackHalf2x16(int({}))", code); - case Type::HalfFloat: - return code; - default: - UNREACHABLE_MSG("Incompatible types"); - return code; - } - } - -private: - std::string code; - Type type{}; -}; - -const char* GetTypeString(Type type) { - switch (type) { - case Type::Bool: - return "bool"; - case Type::Bool2: - return "bvec2"; - case Type::Float: - return "float"; - case Type::Int: - return "int"; - case Type::Uint: - return "uint"; - case Type::HalfFloat: - return "vec2"; - default: - UNREACHABLE_MSG("Invalid type"); - return ""; - } -} - -const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { - switch (image_type) { - case Tegra::Shader::ImageType::Texture1D: - return "1D"; - case Tegra::Shader::ImageType::TextureBuffer: - return "Buffer"; - case Tegra::Shader::ImageType::Texture1DArray: - return "1DArray"; - case Tegra::Shader::ImageType::Texture2D: - return "2D"; - case Tegra::Shader::ImageType::Texture2DArray: - return "2DArray"; - case Tegra::Shader::ImageType::Texture3D: - return "3D"; - default: - UNREACHABLE(); - return "1D"; - } -} - -/// Describes primitive behavior on geometry shaders -std::pair GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { - switch (topology) { - case Maxwell::PrimitiveTopology::Points: - return {"points", 1}; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineStrip: - return {"lines", 2}; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - return {"lines_adjacency", 4}; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - return {"triangles", 3}; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - return {"triangles_adjacency", 6}; - default: - UNIMPLEMENTED_MSG("topology={}", topology); - return {"points", 1}; - } -} - -/// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(std::size_t element) { - constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; - return swizzle.at(element); -} - -constexpr const char* GetColorSwizzle(std::size_t element) { - constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; - return swizzle.at(element); -} - -/// Translate topology -std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { - switch (topology) { - case Tegra::Shader::OutputTopology::PointList: - return "points"; - case Tegra::Shader::OutputTopology::LineStrip: - return "line_strip"; - case Tegra::Shader::OutputTopology::TriangleStrip: - return "triangle_strip"; - default: - UNIMPLEMENTED_MSG("Unknown output topology: {}", topology); - return "points"; - } -} - -/// Returns true if an object has to be treated as precise -bool IsPrecise(Operation operand) { - const auto& meta{operand.GetMeta()}; - if (const auto arithmetic = std::get_if(&meta)) { - return arithmetic->precise; - } - return false; -} - -bool IsPrecise(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - return IsPrecise(*operation); - } - return false; -} - -constexpr bool IsGenericAttribute(Attribute::Index index) { - return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; -} - -constexpr bool IsLegacyTexCoord(Attribute::Index index) { - return static_cast(index) >= static_cast(Attribute::Index::TexCoord_0) && - static_cast(index) <= static_cast(Attribute::Index::TexCoord_7); -} - -constexpr Attribute::Index ToGenericAttribute(u64 value) { - return static_cast(value + static_cast(Attribute::Index::Attribute_0)); -} - -constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { - return static_cast(index) - static_cast(Attribute::Index::TexCoord_0); -} - -u32 GetGenericAttributeIndex(Attribute::Index index) { - ASSERT(IsGenericAttribute(index)); - return static_cast(index) - static_cast(Attribute::Index::Attribute_0); -} - -constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { - switch (stack) { - case MetaStackClass::Ssy: - return "ssy"; - case MetaStackClass::Pbk: - return "pbk"; - } - return {}; -} - -std::string FlowStackName(MetaStackClass stack) { - return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); -} - -std::string FlowStackTopName(MetaStackClass stack) { - return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); -} - -struct GenericVaryingDescription { - std::string name; - u8 first_element = 0; - bool is_scalar = false; -}; - -class GLSLDecompiler final { -public: - explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, - ShaderType stage_, std::string_view identifier_, - std::string_view suffix_) - : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, - identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { - if (stage != ShaderType::Compute) { - transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); - } - } - - void Decompile() { - DeclareHeader(); - DeclareVertex(); - DeclareGeometry(); - DeclareFragment(); - DeclareCompute(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareImages(); - DeclareSamplers(); - DeclareGlobalMemory(); - DeclareConstantBuffers(); - DeclareLocalMemory(); - DeclareRegisters(); - DeclarePredicates(); - DeclareInternalFlags(); - DeclareCustomVariables(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void main() {{"); - ++code.scope; - - if (stage == ShaderType::Vertex) { - code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - } - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); - } - - --code.scope; - code.AddLine("}}"); - } - - std::string GetResult() { - return code.GetResult(); - } - -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - - void DecompileBranchMode() { - // VM's program counter - const auto first_address = ir.GetBasicBlocks().begin()->first; - code.AddLine("uint jmp_to = {}U;", first_address); - - // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems - // unlikely that shaders will use 20 nested SSYs and PBKs. - constexpr u32 FLOW_STACK_SIZE = 20; - if (!ir.IsFlowStackDisabled()) { - for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { - code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); - code.AddLine("uint {} = 0U;", FlowStackTopName(stack)); - } - } - - code.AddLine("while (true) {{"); - ++code.scope; - - code.AddLine("switch (jmp_to) {{"); - - for (const auto& pair : ir.GetBasicBlocks()) { - const auto& [address, bb] = pair; - code.AddLine("case 0x{:X}U: {{", address); - ++code.scope; - - VisitBlock(bb); - - --code.scope; - code.AddLine("}}"); - } - - code.AddLine("default: return;"); - code.AddLine("}}"); - - --code.scope; - code.AddLine("}}"); - } - - void DecompileAST(); - - void DeclareHeader() { - if (!identifier.empty()) { - code.AddLine("// {}", identifier); - } - const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); - code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); - code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); - if (device.HasShaderBallot()) { - code.AddLine("#extension GL_ARB_shader_ballot : require"); - } - if (device.HasVertexViewportLayer()) { - code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); - } - if (device.HasImageLoadFormatted()) { - code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); - } - if (device.HasTextureShadowLod()) { - code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); - } - if (device.HasWarpIntrinsics()) { - code.AddLine("#extension GL_NV_gpu_shader5 : require"); - code.AddLine("#extension GL_NV_shader_thread_group : require"); - code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 - // operations) on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - code.AddLine("#pragma optionNV(fastmath off)"); - - code.AddNewLine(); - - code.AddLine(COMMON_DECLARATIONS); - } - - void DeclareVertex() { - if (stage != ShaderType::Vertex) { - return; - } - - DeclareVertexRedeclarations(); - } - - void DeclareGeometry() { - if (stage != ShaderType::Geometry) { - return; - } - - const auto& info = registry.GetGraphicsInfo(); - const auto input_topology = info.primitive_topology; - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); - max_input_vertices = max_vertices; - code.AddLine("layout ({}) in;", glsl_topology); - - const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_output_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); - code.AddNewLine(); - - code.AddLine("in gl_PerVertex {{"); - ++code.scope; - code.AddLine("vec4 gl_Position;"); - --code.scope; - code.AddLine("}} gl_in[];"); - - DeclareVertexRedeclarations(); - } - - void DeclareFragment() { - if (stage != ShaderType::Fragment) { - return; - } - if (ir.UsesLegacyVaryings()) { - code.AddLine("in gl_PerFragment {{"); - ++code.scope; - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_Color;"); - code.AddLine("vec4 gl_SecondaryColor;"); - --code.scope; - code.AddLine("}};"); - } - - for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { - code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); - } - } - - void DeclareCompute() { - if (stage != ShaderType::Compute) { - return; - } - const auto& info = registry.GetComputeInfo(); - if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { - const u32 limit = device.GetMaxComputeSharedMemorySize(); - if (size > limit) { - LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", - size, limit); - size = limit; - } - - code.AddLine("shared uint smem[{}];", size / 4); - code.AddNewLine(); - } - code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", - info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); - code.AddNewLine(); - } - - void DeclareVertexRedeclarations() { - code.AddLine("out gl_PerVertex {{"); - ++code.scope; - - auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); - if (!pos_xfb.empty()) { - pos_xfb = fmt::format("layout ({}) ", pos_xfb); - } - const char* pos_type = - FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); - code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); - - for (const auto attribute : ir.GetOutputAttributes()) { - if (attribute == Attribute::Index::ClipDistances0123 || - attribute == Attribute::Index::ClipDistances4567) { - code.AddLine("float gl_ClipDistance[];"); - break; - } - } - - if (stage != ShaderType::Geometry && - (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { - if (ir.UsesLayer()) { - code.AddLine("int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("int gl_ViewportIndex;"); - } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && - !device.HasVertexViewportLayer()) { - LOG_ERROR( - Render_OpenGL, - "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); - } - - if (ir.UsesPointSize()) { - code.AddLine("float gl_PointSize;"); - } - - if (ir.UsesLegacyVaryings()) { - code.AddLine("vec4 gl_TexCoord[8];"); - code.AddLine("vec4 gl_FrontColor;"); - code.AddLine("vec4 gl_FrontSecondaryColor;"); - code.AddLine("vec4 gl_BackColor;"); - code.AddLine("vec4 gl_BackSecondaryColor;"); - } - - --code.scope; - code.AddLine("}};"); - code.AddNewLine(); - - if (stage == ShaderType::Geometry) { - if (ir.UsesLayer()) { - code.AddLine("out int gl_Layer;"); - } - if (ir.UsesViewportIndex()) { - code.AddLine("out int gl_ViewportIndex;"); - } - } - code.AddNewLine(); - } - - void DeclareRegisters() { - const auto& registers = ir.GetRegisters(); - for (const u32 gpr : registers) { - code.AddLine("float {} = 0.0f;", GetRegister(gpr)); - } - if (!registers.empty()) { - code.AddNewLine(); - } - } - - void DeclareCustomVariables() { - const u32 num_custom_variables = ir.GetNumCustomVariables(); - for (u32 i = 0; i < num_custom_variables; ++i) { - code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); - } - if (num_custom_variables > 0) { - code.AddNewLine(); - } - } - - void DeclarePredicates() { - const auto& predicates = ir.GetPredicates(); - for (const auto pred : predicates) { - code.AddLine("bool {} = false;", GetPredicate(pred)); - } - if (!predicates.empty()) { - code.AddNewLine(); - } - } - - void DeclareLocalMemory() { - u64 local_memory_size = 0; - if (stage == ShaderType::Compute) { - local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; - } else { - local_memory_size = header.GetLocalMemorySize(); - } - if (local_memory_size == 0) { - return; - } - const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; - code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); - code.AddNewLine(); - } - - void DeclareInternalFlags() { - for (u32 flag = 0; flag < static_cast(InternalFlag::Amount); flag++) { - const auto flag_code = static_cast(flag); - code.AddLine("bool {} = false;", GetInternalFlag(flag_code)); - } - code.AddNewLine(); - } - - const char* GetInputFlags(PixelImap attribute) { - switch (attribute) { - case PixelImap::Perspective: - return "smooth"; - case PixelImap::Constant: - return "flat"; - case PixelImap::ScreenLinear: - return "noperspective"; - case PixelImap::Unused: - break; - } - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute); - return {}; - } - - void DeclareInputAttributes() { - if (ir.HasPhysicalAttributes()) { - const u32 num_inputs{GetNumPhysicalInputAttributes()}; - for (u32 i = 0; i < num_inputs; ++i) { - DeclareInputAttribute(ToGenericAttribute(i), true); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetInputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareInputAttribute(index, false); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 location{GetGenericAttributeIndex(index)}; - - std::string name{GetGenericInputAttribute(index)}; - if (stage == ShaderType::Geometry) { - name = "gs_" + name + "[]"; - } - - std::string suffix_; - if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetPixelImap(location)}; - if (input_mode == PixelImap::Unused) { - return; - } - suffix_ = GetInputFlags(input_mode); - } - - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name); - } - - void DeclareOutputAttributes() { - if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { - for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { - DeclareOutputAttribute(ToGenericAttribute(i)); - } - code.AddNewLine(); - return; - } - - const auto& attributes = ir.GetOutputAttributes(); - for (const auto index : attributes) { - if (IsGenericAttribute(index)) { - DeclareOutputAttribute(index); - } - } - if (!attributes.empty()) { - code.AddNewLine(); - } - } - - std::optional GetNumComponents(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return std::nullopt; - } - return it->second.components; - } - - std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { - const u8 location = static_cast(static_cast(index) * 4 + element); - const auto it = transform_feedback.find(location); - if (it == transform_feedback.end()) { - return {}; - } - - const VaryingTFB& tfb = it->second; - return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, - tfb.offset, tfb.stride); - } - - void DeclareOutputAttribute(Attribute::Index index) { - static constexpr std::string_view swizzle = "xyzw"; - u8 element = 0; - while (element < 4) { - auto xfb = GetTransformFeedbackDecoration(index, element); - if (!xfb.empty()) { - xfb = fmt::format(", {}", xfb); - } - const std::size_t remainder = 4 - element; - const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); - const char* const type = FLOAT_TYPES.at(num_components - 1); - - const u32 location = GetGenericAttributeIndex(index); - - GenericVaryingDescription description; - description.first_element = static_cast(element); - description.is_scalar = num_components == 1; - description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); - if (element != 0 || num_components != 4) { - const std::string_view name_swizzle = swizzle.substr(element, num_components); - description.name = fmt::format("{}_{}", description.name, name_swizzle); - } - for (std::size_t i = 0; i < num_components; ++i) { - const u8 offset = static_cast(location * 4 + element + i); - varying_description.insert({offset, description}); - } - - code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, - xfb, type, description.name); - - element = static_cast(static_cast(element) + num_components); - } - } - - void DeclareConstantBuffers() { - u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, info] : ir.GetConstantBuffers()) { - const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); - const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; - code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, - GetConstBufferBlock(index)); - code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareGlobalMemory() { - u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - // Since we don't know how the shader will use the shader, hint the driver to disable as - // much optimizations as possible - std::string qualifier = "coherent volatile"; - if (usage.is_read && !usage.is_written) { - qualifier += " readonly"; - } else if (usage.is_written && !usage.is_read) { - qualifier += " writeonly"; - } - - code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, - GetGlobalMemoryBlock(base)); - code.AddLine(" uint {}[];", GetGlobalMemory(base)); - code.AddLine("}};"); - code.AddNewLine(); - } - } - - void DeclareSamplers() { - u32 binding = device.GetBaseBindings(stage).sampler; - for (const auto& sampler : ir.GetSamplers()) { - const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding); - binding += sampler.is_indexed ? sampler.size : 1; - - std::string sampler_type = [&]() { - if (sampler.is_buffer) { - return "samplerBuffer"; - } - switch (sampler.type) { - case TextureType::Texture1D: - return "sampler1D"; - case TextureType::Texture2D: - return "sampler2D"; - case TextureType::Texture3D: - return "sampler3D"; - case TextureType::TextureCube: - return "samplerCube"; - default: - UNREACHABLE(); - return "sampler2D"; - } - }(); - if (sampler.is_array) { - sampler_type += "Array"; - } - if (sampler.is_shadow) { - sampler_type += "Shadow"; - } - - if (!sampler.is_indexed) { - code.AddLine("{} {} {};", description, sampler_type, name); - } else { - code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size); - } - } - if (!ir.GetSamplers().empty()) { - code.AddNewLine(); - } - } - - void DeclarePhysicalAttributeReader() { - if (!ir.HasPhysicalAttributes()) { - return; - } - code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{"); - ++code.scope; - code.AddLine("switch (physical_address) {{"); - - // Just declare generic attributes for now. - const auto num_attributes{static_cast(GetNumPhysicalInputAttributes())}; - for (u32 index = 0; index < num_attributes; ++index) { - const auto attribute{ToGenericAttribute(index)}; - for (u32 element = 0; element < 4; ++element) { - constexpr u32 generic_base = 0x80; - constexpr u32 generic_stride = 16; - constexpr u32 element_stride = 4; - const u32 address{generic_base + index * generic_stride + element * element_stride}; - - const bool declared = stage != ShaderType::Fragment || - header.ps.GetPixelImap(index) != PixelImap::Unused; - const std::string value = - declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; - code.AddLine("case 0x{:X}U: return {};", address, value); - } - } - - code.AddLine("default: return 0;"); - - code.AddLine("}}"); - --code.scope; - code.AddLine("}}"); - code.AddNewLine(); - } - - void DeclareImages() { - u32 binding = device.GetBaseBindings(stage).image; - for (const auto& image : ir.GetImages()) { - std::string qualifier = "coherent volatile"; - if (image.is_read && !image.is_written) { - qualifier += " readonly"; - } else if (image.is_written && !image.is_read) { - qualifier += " writeonly"; - } - - const char* format = image.is_atomic ? "r32ui, " : ""; - const char* type_declaration = GetImageTypeDeclaration(image.type); - code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, - qualifier, type_declaration, GetImage(image)); - } - if (!ir.GetImages().empty()) { - code.AddNewLine(); - } - } - - void VisitBlock(const NodeBlock& bb) { - for (const auto& node : bb) { - Visit(node).CheckVoid(); - } - } - - Expression Visit(const Node& node) { - if (const auto operation = std::get_if(&*node)) { - if (const auto amend_index = operation->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - const auto operation_index = static_cast(operation->GetCode()); - if (operation_index >= operation_decompilers.size()) { - UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); - return {}; - } - const auto decompiler = operation_decompilers[operation_index]; - if (decompiler == nullptr) { - UNREACHABLE_MSG("Undefined operation: {}", operation_index); - return {}; - } - return (this->*decompiler)(*operation); - } - - if (const auto gpr = std::get_if(&*node)) { - const u32 index = gpr->GetIndex(); - if (index == Register::ZeroIndex) { - return {"0U", Type::Uint}; - } - return {GetRegister(index), Type::Float}; - } - - if (const auto cv = std::get_if(&*node)) { - const u32 index = cv->GetIndex(); - return {GetCustomVariable(index), Type::Float}; - } - - if (const auto immediate = std::get_if(&*node)) { - const u32 value = immediate->GetValue(); - if (value < 10) { - // For eyecandy avoid using hex numbers on single digits - return {fmt::format("{}U", immediate->GetValue()), Type::Uint}; - } - return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint}; - } - - if (const auto predicate = std::get_if(&*node)) { - const auto value = [&]() -> std::string { - switch (const auto index = predicate->GetIndex(); index) { - case Tegra::Shader::Pred::UnusedIndex: - return "true"; - case Tegra::Shader::Pred::NeverExecute: - return "false"; - default: - return GetPredicate(index); - } - }(); - if (predicate->IsNegated()) { - return {fmt::format("!({})", value), Type::Bool}; - } - return {value, Type::Bool}; - } - - if (const auto abuf = std::get_if(&*node)) { - UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, - "Physical attributes in geometry shaders are not implemented"); - if (abuf->IsPhysicalBuffer()) { - return {fmt::format("ReadPhysicalAttribute({})", - Visit(abuf->GetPhysicalAddress()).AsUint()), - Type::Float}; - } - return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); - } - - if (const auto cbuf = std::get_if(&*node)) { - const Node offset = cbuf->GetOffset(); - - if (const auto immediate = std::get_if(&*offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); - return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), - offset_imm / (4 * 4), (offset_imm / 4) % 4), - Type::Uint}; - } - - // Indirect access - const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); - - if (!device.HasComponentIndexingBug()) { - return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset), - Type::Uint}; - } - - // AMD's proprietary GLSL compiler emits ill code for variable component access. - // To bypass this driver bug generate 4 ifs, one per each component. - const std::string pack = code.GenerateTemporary(); - code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), - final_offset); - - const std::string result = code.GenerateTemporary(); - code.AddLine("uint {};", result); - for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, - GetSwizzle(swizzle)); - } - return {result, Type::Uint}; - } - - if (const auto gmem = std::get_if(&*node)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } - - if (const auto lmem = std::get_if(&*node)) { - return { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } - - if (const auto smem = std::get_if(&*node)) { - return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } - - if (const auto internal_flag = std::get_if(&*node)) { - return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; - } - - if (const auto conditional = std::get_if(&*node)) { - if (const auto amend_index = conditional->GetAmendIndex()) { - Visit(ir.GetAmendNode(*amend_index)).CheckVoid(); - } - // It's invalid to call conditional on nested nodes, use an operation instead - code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool()); - ++code.scope; - - VisitBlock(conditional->GetCode()); - - --code.scope; - code.AddLine("}}"); - return {}; - } - - if (const auto comment = std::get_if(&*node)) { - code.AddLine("// " + comment->GetText()); - return {}; - } - - UNREACHABLE(); - return {}; - } - - Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { - const auto GeometryPass = [&](std::string_view name) { - if (stage == ShaderType::Geometry && buffer) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), - max_input_vertices.value()); - } - return std::string(name); - }; - - switch (attribute) { - case Attribute::Index::Position: - switch (stage) { - case ShaderType::Geometry: - return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), - GetSwizzle(element)), - Type::Float}; - case ShaderType::Fragment: - return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; - default: - UNREACHABLE(); - return {"0", Type::Int}; - } - case Attribute::Index::FrontColor: - return {"gl_Color"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::FrontSecondaryColor: - return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return {"gl_PointCoord.x", Type::Float}; - case 1: - return {"gl_PointCoord.y", Type::Float}; - case 2: - case 3: - return {"0.0f", Type::Float}; - } - UNREACHABLE(); - return {"0", Type::Int}; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderType::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return {"gl_InstanceID", Type::Int}; - case 3: - return {"gl_VertexID", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return {"0", Type::Int}; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderType::Fragment); - switch (element) { - case 3: - return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return {"0", Type::Int}; - default: - if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), - Type::Float}; - } - if (IsLegacyTexCoord(attribute)) { - UNIMPLEMENTED_IF(stage == ShaderType::Geometry); - return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}; - } - break; - } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); - return {"0", Type::Int}; - } - - Expression ApplyPrecise(Operation operation, std::string value, Type type) { - if (!IsPrecise(operation)) { - return {std::move(value), type}; - } - // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to - // be found in fragment shaders, so we disable precise there. There are vertex shaders that - // also fail to build but nobody seems to care about those. - // Note: Only bugged drivers will skip precise. - const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; - - std::string temporary = code.GenerateTemporary(); - code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), - temporary, value); - return {std::move(temporary), type}; - } - - Expression VisitOperand(Operation operation, std::size_t operand_index) { - const auto& operand = operation[operand_index]; - const bool parent_precise = IsPrecise(operation); - const bool child_precise = IsPrecise(operand); - const bool child_trivial = !std::holds_alternative(*operand); - if (!parent_precise || child_precise || child_trivial) { - return Visit(operand); - } - - Expression value = Visit(operand); - std::string temporary = code.GenerateTemporary(); - code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode()); - return {std::move(temporary), value.GetType()}; - } - - std::optional GetOutputAttribute(const AbufNode* abuf) { - const u32 element = abuf->GetElement(); - switch (const auto attribute = abuf->GetIndex()) { - case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::LayerViewportPointSize: - switch (element) { - case 0: - UNIMPLEMENTED(); - return std::nullopt; - case 1: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_Layer", Type::Int}}; - case 2: - if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return std::nullopt; - } - return {{"gl_ViewportIndex", Type::Int}}; - case 3: - return {{"gl_PointSize", Type::Float}}; - } - return std::nullopt; - case Attribute::Index::FrontColor: - return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::FrontSecondaryColor: - return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackColor: - return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::BackSecondaryColor: - return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; - case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; - case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; - default: - if (IsGenericAttribute(attribute)) { - return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; - } - if (IsLegacyTexCoord(attribute)) { - return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), - GetSwizzle(element)), - Type::Float}}; - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute); - return std::nullopt; - } - } - - Expression GenerateUnary(Operation operation, std::string_view func, Type result_type, - Type type_a) { - std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a)); - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("({} {} {})", op_a, func, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateTernary(Operation operation, std::string_view func, Type result_type, - Type type_a, Type type_b, Type type_c) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type, - Type type_a, Type type_b, Type type_c, Type type_d) { - const std::string op_a = VisitOperand(operation, 0).As(type_a); - const std::string op_b = VisitOperand(operation, 1).As(type_b); - const std::string op_c = VisitOperand(operation, 2).As(type_c); - const std::string op_d = VisitOperand(operation, 3).As(type_d); - std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d); - - return ApplyPrecise(operation, std::move(op_str), result_type); - } - - std::string GenerateTexture(Operation operation, const std::string& function_suffix, - const std::vector& extras, bool separate_dc = false) { - constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"}; - - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::size_t count = operation.GetOperandsCount(); - const bool has_array = meta->sampler.is_array; - const bool has_shadow = meta->sampler.is_shadow; - const bool workaround_lod_array_shadow_as_grad = - !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube); - - std::string expr = "texture"; - - if (workaround_lod_array_shadow_as_grad) { - expr += "Grad"; - } else { - expr += function_suffix; - } - - if (!meta->aoffi.empty()) { - expr += "Offset"; - } else if (!meta->ptp.empty()) { - expr += "Offsets"; - } - if (!meta->sampler.is_indexed) { - expr += '(' + GetSampler(meta->sampler) + ", "; - } else { - expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; - } - expr += coord_constructors.at(count + (has_array ? 1 : 0) + - (has_shadow && !separate_dc ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - expr += Visit(operation[i]).AsFloat(); - - const std::size_t next = i + 1; - if (next < count) - expr += ", "; - } - if (has_array) { - expr += ", float(" + Visit(meta->array).AsInt() + ')'; - } - if (has_shadow) { - if (separate_dc) { - expr += "), " + Visit(meta->depth_compare).AsFloat(); - } else { - expr += ", " + Visit(meta->depth_compare).AsFloat() + ')'; - } - } else { - expr += ')'; - } - - if (workaround_lod_array_shadow_as_grad) { - switch (meta->sampler.type) { - case TextureType::Texture2D: - return expr + ", vec2(0.0), vec2(0.0))"; - case TextureType::TextureCube: - return expr + ", vec3(0.0), vec3(0.0))"; - default: - UNREACHABLE(); - break; - } - } - - for (const auto& variant : extras) { - if (const auto argument = std::get_if(&variant)) { - expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative(variant)) { - if (!meta->aoffi.empty()) { - expr += GenerateTextureAoffi(meta->aoffi); - } else if (!meta->ptp.empty()) { - expr += GenerateTexturePtp(meta->ptp); - } - } else if (std::holds_alternative(variant)) { - expr += GenerateTextureDerivates(meta->derivates); - } else { - UNREACHABLE(); - } - } - - return expr + ')'; - } - - std::string GenerateTextureArgument(const TextureArgument& argument) { - const auto& [type, operand] = argument; - if (operand == nullptr) { - return {}; - } - - std::string expr = ", "; - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if(&*operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast(immediate->GetValue())); - } else { - expr += Visit(operand).AsInt(); - } - break; - case Type::Float: - expr += Visit(operand).AsFloat(); - break; - default: { - const auto type_int = static_cast(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; - } - } - return expr; - } - - std::string ReadTextureOffset(const Node& value) { - if (const auto immediate = std::get_if(&*value)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - return std::to_string(static_cast(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - return Visit(value).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - return "0"; - } - } - - std::string GenerateTextureAoffi(const std::vector& aoffi) { - if (aoffi.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"}; - std::string expr = ", "; - expr += coord_constructors.at(aoffi.size() - 1); - expr += '('; - - for (std::size_t index = 0; index < aoffi.size(); ++index) { - expr += ReadTextureOffset(aoffi.at(index)); - if (index + 1 < aoffi.size()) { - expr += ", "; - } - } - expr += ')'; - - return expr; - } - - std::string GenerateTexturePtp(const std::vector& ptp) { - static constexpr std::size_t num_vectors = 4; - ASSERT(ptp.size() == num_vectors * 2); - - std::string expr = ", ivec2[]("; - for (std::size_t vector = 0; vector < num_vectors; ++vector) { - const bool has_next = vector + 1 < num_vectors; - expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), - ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); - } - expr += ')'; - return expr; - } - - std::string GenerateTextureDerivates(const std::vector& derivates) { - if (derivates.empty()) { - return {}; - } - constexpr std::array coord_constructors = {"float", "vec2", "vec3"}; - std::string expr = ", "; - const std::size_t components = derivates.size() / 2; - std::string dx = coord_constructors.at(components - 1); - std::string dy = coord_constructors.at(components - 1); - dx += '('; - dy += '('; - - for (std::size_t index = 0; index < components; ++index) { - const auto& operand_x{derivates.at(index * 2)}; - const auto& operand_y{derivates.at(index * 2 + 1)}; - dx += Visit(operand_x).AsFloat(); - dy += Visit(operand_y).AsFloat(); - - if (index + 1 < components) { - dx += ", "; - dy += ", "; - } - } - dx += ')'; - dy += ')'; - expr += dx + ", " + dy; - - return expr; - } - - std::string BuildIntegerCoordinates(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; - const std::size_t coords_count{operation.GetOperandsCount()}; - std::string expr = constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - std::string BuildImageValues(Operation operation) { - constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; - const auto& meta{std::get(operation.GetMeta())}; - - const std::size_t values_count{meta.values.size()}; - std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsUint(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += ')'; - return expr; - } - - Expression Assign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - Expression target; - if (const auto gpr = std::get_if(&*dest)) { - if (gpr->GetIndex() == Register::ZeroIndex) { - // Writing to Register::ZeroIndex is a no op but we still have to visit the source - // as it might have side effects. - code.AddLine("{};", Visit(src).GetCode()); - return {}; - } - target = {GetRegister(gpr->GetIndex()), Type::Float}; - } else if (const auto abuf = std::get_if(&*dest)) { - UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - auto output = GetOutputAttribute(abuf); - if (!output) { - return {}; - } - target = std::move(*output); - } else if (const auto lmem = std::get_if(&*dest)) { - target = { - fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), - Type::Uint}; - } else if (const auto smem = std::get_if(&*dest)) { - ASSERT(stage == ShaderType::Compute); - target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; - } else if (const auto gmem = std::get_if(&*dest)) { - const std::string real = Visit(gmem->GetRealAddress()).AsUint(); - const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); - const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); - target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), - Type::Uint}; - } else if (const auto cv = std::get_if(&*dest)) { - target = {GetCustomVariable(cv->GetIndex()), Type::Float}; - } else { - UNREACHABLE_MSG("Assign called without a proper target"); - } - - code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType())); - return {}; - } - - template - Expression Add(Operation operation) { - return GenerateBinaryInfix(operation, "+", type, type, type); - } - - template - Expression Mul(Operation operation) { - return GenerateBinaryInfix(operation, "*", type, type, type); - } - - template - Expression Div(Operation operation) { - return GenerateBinaryInfix(operation, "/", type, type, type); - } - - template - Expression Fma(Operation operation) { - return GenerateTernary(operation, "fma", type, type, type, type); - } - - template - Expression Negate(Operation operation) { - return GenerateUnary(operation, "-", type, type); - } - - template - Expression Absolute(Operation operation) { - return GenerateUnary(operation, "abs", type, type); - } - - Expression FClamp(Operation operation) { - return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, - Type::Float); - } - - Expression FCastHalf0(Operation operation) { - return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression FCastHalf1(Operation operation) { - return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - template - Expression Min(Operation operation) { - return GenerateBinaryCall(operation, "min", type, type, type); - } - - template - Expression Max(Operation operation) { - return GenerateBinaryCall(operation, "max", type, type, type); - } - - Expression Select(Operation operation) { - const std::string condition = Visit(operation[0]).AsBool(); - const std::string true_case = Visit(operation[1]).AsUint(); - const std::string false_case = Visit(operation[2]).AsUint(); - std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case); - - return ApplyPrecise(operation, std::move(op_str), Type::Uint); - } - - Expression FCos(Operation operation) { - return GenerateUnary(operation, "cos", Type::Float, Type::Float); - } - - Expression FSin(Operation operation) { - return GenerateUnary(operation, "sin", Type::Float, Type::Float); - } - - Expression FExp2(Operation operation) { - return GenerateUnary(operation, "exp2", Type::Float, Type::Float); - } - - Expression FLog2(Operation operation) { - return GenerateUnary(operation, "log2", Type::Float, Type::Float); - } - - Expression FInverseSqrt(Operation operation) { - return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float); - } - - Expression FSqrt(Operation operation) { - return GenerateUnary(operation, "sqrt", Type::Float, Type::Float); - } - - Expression FRoundEven(Operation operation) { - return GenerateUnary(operation, "roundEven", Type::Float, Type::Float); - } - - Expression FFloor(Operation operation) { - return GenerateUnary(operation, "floor", Type::Float, Type::Float); - } - - Expression FCeil(Operation operation) { - return GenerateUnary(operation, "ceil", Type::Float, Type::Float); - } - - Expression FTrunc(Operation operation) { - return GenerateUnary(operation, "trunc", Type::Float, Type::Float); - } - - template - Expression FCastInteger(Operation operation) { - return GenerateUnary(operation, "float", Type::Float, type); - } - - Expression FSwizzleAdd(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsFloat(); - const std::string op_b = VisitOperand(operation, 1).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {fmt::format("{} + {}", op_a, op_b), Type::Float}; - } - - const std::string instr_mask = VisitOperand(operation, 2).AsUint(); - const std::string mask = code.GenerateTemporary(); - code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, - instr_mask); - - const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); - const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); - return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), - Type::Float}; - } - - Expression ICastFloat(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Float); - } - - Expression ICastUnsigned(Operation operation) { - return GenerateUnary(operation, "int", Type::Int, Type::Uint); - } - - template - Expression LogicalShiftLeft(Operation operation) { - return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); - } - - Expression ILogicalShiftRight(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - const std::string op_b = VisitOperand(operation, 1).AsUint(); - std::string op_str = fmt::format("int({} >> {})", op_a, op_b); - - return ApplyPrecise(operation, std::move(op_str), Type::Int); - } - - Expression IArithmeticShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); - } - - template - Expression BitwiseAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&", type, type, type); - } - - template - Expression BitwiseOr(Operation operation) { - return GenerateBinaryInfix(operation, "|", type, type, type); - } - - template - Expression BitwiseXor(Operation operation) { - return GenerateBinaryInfix(operation, "^", type, type, type); - } - - template - Expression BitwiseNot(Operation operation) { - return GenerateUnary(operation, "~", type, type); - } - - Expression UCastFloat(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Float); - } - - Expression UCastSigned(Operation operation) { - return GenerateUnary(operation, "uint", Type::Uint, Type::Int); - } - - Expression UShiftRight(Operation operation) { - return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); - } - - template - Expression BitfieldInsert(Operation operation) { - return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, - Type::Int); - } - - template - Expression BitfieldExtract(Operation operation) { - return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); - } - - template - Expression BitCount(Operation operation) { - return GenerateUnary(operation, "bitCount", type, type); - } - - template - Expression BitMSB(Operation operation) { - return GenerateUnary(operation, "findMSB", type, type); - } - - Expression HNegate(Operation operation) { - const auto GetNegate = [&](std::size_t index) { - return VisitOperand(operation, index).AsBool() + " ? -1 : 1"; - }; - return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(), - GetNegate(1), GetNegate(2)), - Type::HalfFloat}; - } - - Expression HClamp(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsHalfFloat(); - const std::string min = VisitOperand(operation, 1).AsFloat(); - const std::string max = VisitOperand(operation, 2).AsFloat(); - std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max); - - return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat); - } - - Expression HCastFloat(Operation operation) { - return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()), - Type::HalfFloat}; - } - - Expression HUnpack(Operation operation) { - Expression operand = VisitOperand(operation, 0); - switch (std::get(operation.GetMeta())) { - case Tegra::Shader::HalfType::H0_H1: - return operand; - case Tegra::Shader::HalfType::F32: - return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H0_H0: - return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat}; - case Tegra::Shader::HalfType::H1_H1: - return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression HMergeF32(Operation operation) { - return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float}; - } - - Expression HMergeH0(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), - Type::HalfFloat}; - } - - Expression HMergeH1(Operation operation) { - const std::string dest = VisitOperand(operation, 0).AsUint(); - const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), - Type::HalfFloat}; - } - - Expression HPack2(Operation operation) { - return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::HalfFloat}; - } - - template - Expression Comparison(Operation operation) { - static_assert(!unordered || type == Type::Float); - - Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type); - - if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) { - // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's - // and Nvidia's proprietary stacks. Manually force an ordered comparison. - return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - if constexpr (!unordered) { - return expr; - } - // Unordered comparisons are always true for NaN operands. - return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(), - VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FOrdered(Operation operation) { - return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression FUnordered(Operation operation) { - return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(), - VisitOperand(operation, 1).AsFloat()), - Type::Bool}; - } - - Expression LogicalAddCarry(Operation operation) { - const std::string carry = code.GenerateTemporary(); - code.AddLine("uint {};", carry); - code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(), - VisitOperand(operation, 1).AsUint(), carry); - return {fmt::format("({} != 0)", carry), Type::Bool}; - } - - Expression LogicalAssign(Operation operation) { - const Node& dest = operation[0]; - const Node& src = operation[1]; - - std::string target; - - if (const auto pred = std::get_if(&*dest)) { - ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); - - const auto index = pred->GetIndex(); - switch (index) { - case Tegra::Shader::Pred::NeverExecute: - case Tegra::Shader::Pred::UnusedIndex: - // Writing to these predicates is a no-op - return {}; - } - target = GetPredicate(index); - } else if (const auto flag = std::get_if(&*dest)) { - target = GetInternalFlag(flag->GetFlag()); - } - - code.AddLine("{} = {};", target, Visit(src).AsBool()); - return {}; - } - - Expression LogicalAnd(Operation operation) { - return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalOr(Operation operation) { - return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalXor(Operation operation) { - return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); - } - - Expression LogicalNegate(Operation operation) { - return GenerateUnary(operation, "!", Type::Bool, Type::Bool); - } - - Expression LogicalPick2(Operation operation) { - return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(), - VisitOperand(operation, 1).AsUint()), - Type::Bool}; - } - - Expression LogicalAnd2(Operation operation) { - return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); - } - - template - Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) { - Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2, - Type::HalfFloat, Type::HalfFloat); - if constexpr (!with_nan) { - return comparison; - } - return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(), - VisitOperand(operation, 0).AsHalfFloat(), - VisitOperand(operation, 1).AsHalfFloat()), - Type::Bool2}; - } - - template - Expression Logical2HLessThan(Operation operation) { - return GenerateHalfComparison(operation, "lessThan"); - } - - template - Expression Logical2HEqual(Operation operation) { - return GenerateHalfComparison(operation, "equal"); - } - - template - Expression Logical2HLessEqual(Operation operation) { - return GenerateHalfComparison(operation, "lessThanEqual"); - } - - template - Expression Logical2HGreaterThan(Operation operation) { - return GenerateHalfComparison(operation, "greaterThan"); - } - - template - Expression Logical2HNotEqual(Operation operation) { - return GenerateHalfComparison(operation, "notEqual"); - } - - template - Expression Logical2HGreaterEqual(Operation operation) { - return GenerateHalfComparison(operation, "greaterThanEqual"); - } - - Expression Texture(Operation operation) { - const auto meta = std::get(operation.GetMeta()); - const bool separate_dc = meta.sampler.type == TextureType::TextureCube && - meta.sampler.is_array && meta.sampler.is_shadow; - // TODO: Replace this with an array and make GenerateTexture use C++20 std::span - const std::vector extras{ - TextureOffset{}, - TextureArgument{Type::Float, meta.bias}, - }; - std::string expr = GenerateTexture(operation, "", extras, separate_dc); - if (meta.sampler.is_shadow) { - expr = fmt::format("vec4({})", expr); - } - return {expr + GetSwizzle(meta.element), Type::Float}; - } - - Expression TextureLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - std::string expr{}; - - if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && - ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || - meta->sampler.type == TextureType::TextureCube)) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); - expr = GenerateTexture(operation, "Lod", {}); - } else { - expr = GenerateTexture(operation, "Lod", - {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); - } - - if (meta->sampler.is_shadow) { - expr = "vec4(" + expr + ')'; - } - return {expr + GetSwizzle(meta->element), Type::Float}; - } - - Expression TextureGather(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - - const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int; - const bool separate_dc = meta.sampler.is_shadow; - - std::vector ir_; - if (meta.sampler.is_shadow) { - ir_ = {TextureOffset{}}; - } else { - ir_ = {TextureOffset{}, TextureArgument{type, meta.component}}; - } - return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element), - Type::Float}; - } - - Expression TextureQueryDimensions(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - const std::string sampler = GetSampler(meta->sampler); - const std::string lod = VisitOperand(operation, 0).AsInt(); - - switch (meta->element) { - case 0: - case 1: - return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)), - Type::Int}; - case 3: - return {fmt::format("textureQueryLevels({})", sampler), Type::Int}; - } - UNREACHABLE(); - return {"0", Type::Int}; - } - - Expression TextureQueryLod(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - - if (meta->element < 2) { - return {fmt::format("int(({} * vec2(256)){})", - GenerateTexture(operation, "QueryLod", {}), - GetSwizzle(meta->element)), - Type::Int}; - } - return {"0", Type::Int}; - } - - Expression TexelFetch(Operation operation) { - constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"}; - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); - UNIMPLEMENTED_IF(meta->sampler.is_array); - const std::size_t count = operation.GetOperandsCount(); - - std::string expr = "texelFetch("; - expr += GetSampler(meta->sampler); - expr += ", "; - - expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); - expr += '('; - for (std::size_t i = 0; i < count; ++i) { - if (i > 0) { - expr += ", "; - } - expr += VisitOperand(operation, i).AsInt(); - } - if (meta->array) { - expr += ", "; - expr += Visit(meta->array).AsInt(); - } - expr += ')'; - - if (meta->lod && !meta->sampler.is_buffer) { - expr += ", "; - expr += Visit(meta->lod).AsInt(); - } - expr += ')'; - expr += GetSwizzle(meta->element); - - return {std::move(expr), Type::Float}; - } - - Expression TextureGradient(Operation operation) { - const auto& meta = std::get(operation.GetMeta()); - std::string expr = - GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); - return {std::move(expr) + GetSwizzle(meta.element), Type::Float}; - } - - Expression ImageLoad(Operation operation) { - if (!device.HasImageLoadFormatted()) { - LOG_ERROR(Render_OpenGL, - "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); - return {"0", Type::Int}; - } - - const auto& meta{std::get(operation.GetMeta())}; - return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), - BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), - Type::Uint}; - } - - Expression ImageStore(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), - BuildIntegerCoordinates(operation), BuildImageValues(operation)); - return {}; - } - - template - Expression AtomicImage(Operation operation) { - const auto& meta{std::get(operation.GetMeta())}; - ASSERT(meta.values.size() == 1); - - return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), - BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()), - Type::Uint}; - } - - template - Expression Atomic(Operation operation) { - if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { - UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); - return {}; - } - return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).AsUint()), - Type::Uint}; - } - - template - Expression Reduce(Operation operation) { - code.AddLine("{};", Atomic(operation).GetCode()); - return {}; - } - - Expression Branch(Operation operation) { - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("jmp_to = 0x{:X}U;", target->GetValue()); - code.AddLine("break;"); - return {}; - } - - Expression BranchIndirect(Operation operation) { - const std::string op_a = VisitOperand(operation, 0).AsUint(); - - code.AddLine("jmp_to = {};", op_a); - code.AddLine("break;"); - return {}; - } - - Expression PushFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - const auto target = std::get_if(&*operation[0]); - UNIMPLEMENTED_IF(!target); - - code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack), - target->GetValue()); - return {}; - } - - Expression PopFlowStack(Operation operation) { - const auto stack = std::get(operation.GetMeta()); - code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); - code.AddLine("break;"); - return {}; - } - - void PreExit() { - if (stage != ShaderType::Fragment) { - return; - } - const auto& used_registers = ir.GetRegisters(); - const auto SafeGetRegister = [&](u32 reg) -> Expression { - // TODO(Rodrigo): Replace with contains once C++20 releases - if (used_registers.find(reg) != used_registers.end()) { - return {GetRegister(reg), Type::Float}; - } - return {"0.0f", Type::Float}; - }; - - UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - - // Write the color outputs using the data in the shader registers, disabled - // rendertargets/components are skipped in the register assignment. - u32 current_reg = 0; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Subv): Figure out how dual-source blending is configured in the Switch. - for (u32 component = 0; component < 4; ++component) { - if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), - SafeGetRegister(current_reg).AsFloat()); - ++current_reg; - } - } - } - if (header.ps.omap.depth) { - // The depth output is always 2 registers after the last color output, and current_reg - // already contains one past the last color register. - code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat()); - } - } - - Expression Exit(Operation operation) { - PreExit(); - code.AddLine("return;"); - return {}; - } - - Expression Discard(Operation operation) { - // Enclose "discard" in a conditional, so that GLSL compilation does not complain - // about unexecuted instructions that may follow this. - code.AddLine("if (true) {{"); - ++code.scope; - code.AddLine("discard;"); - --code.scope; - code.AddLine("}}"); - return {}; - } - - Expression EmitVertex(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EmitVertex is expected to be used in a geometry shader."); - code.AddLine("EmitVertex();"); - return {}; - } - - Expression EndPrimitive(Operation operation) { - ASSERT_MSG(stage == ShaderType::Geometry, - "EndPrimitive is expected to be used in a geometry shader."); - code.AddLine("EndPrimitive();"); - return {}; - } - - Expression InvocationId(Operation operation) { - return {"gl_InvocationID", Type::Int}; - } - - Expression YNegate(Operation operation) { - // Y_NEGATE is mapped to this uniform value - return {"gl_FrontMaterial.ambient.a", Type::Float}; - } - - template - Expression LocalInvocationId(Operation) { - return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint}; - } - - template - Expression WorkGroupId(Operation) { - return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint}; - } - - Expression BallotThread(Operation operation) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub on non-Nvidia devices by simulating all threads voting the same as the active - // one. - return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; - } - return {fmt::format("ballotThreadNV({})", value), Type::Uint}; - } - - Expression Vote(Operation operation, const char* func) { - const std::string value = VisitOperand(operation, 0).AsBool(); - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // Stub with a warp size of one. - return {value, Type::Bool}; - } - return {fmt::format("{}({})", func, value), Type::Bool}; - } - - Expression VoteAll(Operation operation) { - return Vote(operation, "allThreadsNV"); - } - - Expression VoteAny(Operation operation) { - return Vote(operation, "anyThreadNV"); - } - - Expression VoteEqual(Operation operation) { - if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); - // We must return true here since a stub for a theoretical warp size of 1. - // This will always return an equal result across all votes. - return {"true", Type::Bool}; - } - return Vote(operation, "allThreadsEqualNV"); - } - - Expression ThreadId(Operation operation) { - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {"0U", Type::Uint}; - } - return {"gl_SubGroupInvocationARB", Type::Uint}; - } - - template - Expression ThreadMask(Operation) { - if (device.HasWarpIntrinsics()) { - return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; - } - if (device.HasShaderBallot()) { - return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; - } - LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); - return {"0U", Type::Uint}; - } - - Expression ShuffleIndexed(Operation operation) { - std::string value = VisitOperand(operation, 0).AsFloat(); - - if (!device.HasShaderBallot()) { - LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); - return {std::move(value), Type::Float}; - } - - const std::string index = VisitOperand(operation, 1).AsUint(); - return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; - } - - Expression Barrier(Operation) { - if (!ir.IsDecompiled()) { - LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); - return {}; - } - code.AddLine("barrier();"); - return {}; - } - - Expression MemoryBarrierGroup(Operation) { - code.AddLine("groupMemoryBarrier();"); - return {}; - } - - Expression MemoryBarrierGlobal(Operation) { - code.AddLine("memoryBarrier();"); - return {}; - } - - struct Func final { - Func() = delete; - ~Func() = delete; - - static constexpr std::string_view LessThan = "<"; - static constexpr std::string_view Equal = "=="; - static constexpr std::string_view LessEqual = "<="; - static constexpr std::string_view GreaterThan = ">"; - static constexpr std::string_view NotEqual = "!="; - static constexpr std::string_view GreaterEqual = ">="; - - static constexpr std::string_view Eq = "Eq"; - static constexpr std::string_view Ge = "Ge"; - static constexpr std::string_view Gt = "Gt"; - static constexpr std::string_view Le = "Le"; - static constexpr std::string_view Lt = "Lt"; - - static constexpr std::string_view Add = "Add"; - static constexpr std::string_view Min = "Min"; - static constexpr std::string_view Max = "Max"; - static constexpr std::string_view And = "And"; - static constexpr std::string_view Or = "Or"; - static constexpr std::string_view Xor = "Xor"; - static constexpr std::string_view Exchange = "Exchange"; - }; - - static constexpr std::array operation_decompilers = { - &GLSLDecompiler::Assign, - - &GLSLDecompiler::Select, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::FClamp, - &GLSLDecompiler::FCastHalf0, - &GLSLDecompiler::FCastHalf1, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::FCos, - &GLSLDecompiler::FSin, - &GLSLDecompiler::FExp2, - &GLSLDecompiler::FLog2, - &GLSLDecompiler::FInverseSqrt, - &GLSLDecompiler::FSqrt, - &GLSLDecompiler::FRoundEven, - &GLSLDecompiler::FFloor, - &GLSLDecompiler::FCeil, - &GLSLDecompiler::FTrunc, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FCastInteger, - &GLSLDecompiler::FSwizzleAdd, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Negate, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - - &GLSLDecompiler::ICastFloat, - &GLSLDecompiler::ICastUnsigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::ILogicalShiftRight, - &GLSLDecompiler::IArithmeticShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Div, - &GLSLDecompiler::Min, - &GLSLDecompiler::Max, - &GLSLDecompiler::UCastFloat, - &GLSLDecompiler::UCastSigned, - &GLSLDecompiler::LogicalShiftLeft, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::UShiftRight, - &GLSLDecompiler::BitwiseAnd, - &GLSLDecompiler::BitwiseOr, - &GLSLDecompiler::BitwiseXor, - &GLSLDecompiler::BitwiseNot, - &GLSLDecompiler::BitfieldInsert, - &GLSLDecompiler::BitfieldExtract, - &GLSLDecompiler::BitCount, - &GLSLDecompiler::BitMSB, - - &GLSLDecompiler::Add, - &GLSLDecompiler::Mul, - &GLSLDecompiler::Fma, - &GLSLDecompiler::Absolute, - &GLSLDecompiler::HNegate, - &GLSLDecompiler::HClamp, - &GLSLDecompiler::HCastFloat, - &GLSLDecompiler::HUnpack, - &GLSLDecompiler::HMergeF32, - &GLSLDecompiler::HMergeH0, - &GLSLDecompiler::HMergeH1, - &GLSLDecompiler::HPack2, - - &GLSLDecompiler::LogicalAssign, - &GLSLDecompiler::LogicalAnd, - &GLSLDecompiler::LogicalOr, - &GLSLDecompiler::LogicalXor, - &GLSLDecompiler::LogicalNegate, - &GLSLDecompiler::LogicalPick2, - &GLSLDecompiler::LogicalAnd2, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::FOrdered, - &GLSLDecompiler::FUnordered, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - &GLSLDecompiler::Comparison, - - &GLSLDecompiler::LogicalAddCarry, - - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - &GLSLDecompiler::Logical2HLessThan, - &GLSLDecompiler::Logical2HEqual, - &GLSLDecompiler::Logical2HLessEqual, - &GLSLDecompiler::Logical2HGreaterThan, - &GLSLDecompiler::Logical2HNotEqual, - &GLSLDecompiler::Logical2HGreaterEqual, - - &GLSLDecompiler::Texture, - &GLSLDecompiler::TextureLod, - &GLSLDecompiler::TextureGather, - &GLSLDecompiler::TextureQueryDimensions, - &GLSLDecompiler::TextureQueryLod, - &GLSLDecompiler::TexelFetch, - &GLSLDecompiler::TextureGradient, - - &GLSLDecompiler::ImageLoad, - &GLSLDecompiler::ImageStore, - - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - &GLSLDecompiler::AtomicImage, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - &GLSLDecompiler::Atomic, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - &GLSLDecompiler::Reduce, - - &GLSLDecompiler::Branch, - &GLSLDecompiler::BranchIndirect, - &GLSLDecompiler::PushFlowStack, - &GLSLDecompiler::PopFlowStack, - &GLSLDecompiler::Exit, - &GLSLDecompiler::Discard, - - &GLSLDecompiler::EmitVertex, - &GLSLDecompiler::EndPrimitive, - - &GLSLDecompiler::InvocationId, - &GLSLDecompiler::YNegate, - &GLSLDecompiler::LocalInvocationId<0>, - &GLSLDecompiler::LocalInvocationId<1>, - &GLSLDecompiler::LocalInvocationId<2>, - &GLSLDecompiler::WorkGroupId<0>, - &GLSLDecompiler::WorkGroupId<1>, - &GLSLDecompiler::WorkGroupId<2>, - - &GLSLDecompiler::BallotThread, - &GLSLDecompiler::VoteAll, - &GLSLDecompiler::VoteAny, - &GLSLDecompiler::VoteEqual, - - &GLSLDecompiler::ThreadId, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ThreadMask, - &GLSLDecompiler::ShuffleIndexed, - - &GLSLDecompiler::Barrier, - &GLSLDecompiler::MemoryBarrierGroup, - &GLSLDecompiler::MemoryBarrierGlobal, - }; - static_assert(operation_decompilers.size() == static_cast(OperationCode::Amount)); - - std::string GetRegister(u32 index) const { - return AppendSuffix(index, "gpr"); - } - - std::string GetCustomVariable(u32 index) const { - return AppendSuffix(index, "custom_var"); - } - - std::string GetPredicate(Tegra::Shader::Pred pred) const { - return AppendSuffix(static_cast(pred), "pred"); - } - - std::string GetGenericInputAttribute(Attribute::Index attribute) const { - return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); - } - - std::unordered_map varying_description; - - std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { - const u8 offset = static_cast(GetGenericAttributeIndex(attribute) * 4 + element); - const auto& description = varying_description.at(offset); - if (description.is_scalar) { - return description.name; - } - return fmt::format("{}[{}]", description.name, element - description.first_element); - } - - std::string GetConstBuffer(u32 index) const { - return AppendSuffix(index, "cbuf"); - } - - std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); - } - - std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { - return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, - suffix); - } - - std::string GetConstBufferBlock(u32 index) const { - return AppendSuffix(index, "cbuf_block"); - } - - std::string GetLocalMemory() const { - if (suffix.empty()) { - return "lmem"; - } else { - return "lmem_" + std::string{suffix}; - } - } - - std::string GetInternalFlag(InternalFlag flag) const { - constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", - "overflow_flag"}; - const auto index = static_cast(flag); - ASSERT(index < static_cast(InternalFlag::Amount)); - - if (suffix.empty()) { - return InternalFlagNames[index]; - } else { - return fmt::format("{}_{}", InternalFlagNames[index], suffix); - } - } - - std::string GetSampler(const SamplerEntry& sampler) const { - return AppendSuffix(sampler.index, "sampler"); - } - - std::string GetImage(const ImageEntry& image) const { - return AppendSuffix(image.index, "image"); - } - - std::string AppendSuffix(u32 index, std::string_view name) const { - if (suffix.empty()) { - return fmt::format("{}{}", name, index); - } else { - return fmt::format("{}{}_{}", name, index, suffix); - } - } - - u32 GetNumPhysicalInputAttributes() const { - return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); - } - - u32 GetNumPhysicalAttributes() const { - return std::min(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); - } - - u32 GetNumPhysicalVaryings() const { - return std::min(device.GetMaxVaryings(), Maxwell::NumVaryings); - } - - const Device& device; - const ShaderIR& ir; - const Registry& registry; - const ShaderType stage; - const std::string_view identifier; - const std::string_view suffix; - const Header header; - std::unordered_map transform_feedback; - - ShaderWriter code; - - std::optional max_input_vertices; -}; - -std::string GetFlowVariable(u32 index) { - return fmt::format("flow_var{}", index); -} - -class ExprDecompiler { -public: - explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ExprAnd& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " && "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprOr& expr) { - inner += '('; - std::visit(*this, *expr.operand1); - inner += " || "; - std::visit(*this, *expr.operand2); - inner += ')'; - } - - void operator()(const ExprNot& expr) { - inner += '!'; - std::visit(*this, *expr.operand1); - } - - void operator()(const ExprPredicate& expr) { - const auto pred = static_cast(expr.predicate); - inner += decomp.GetPredicate(pred); - } - - void operator()(const ExprCondCode& expr) { - inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool(); - } - - void operator()(const ExprVar& expr) { - inner += GetFlowVariable(expr.var_index); - } - - void operator()(const ExprBoolean& expr) { - inner += expr.value ? "true" : "false"; - } - - void operator()(VideoCommon::Shader::ExprGprEqual& expr) { - inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value); - } - - const std::string& GetResult() const { - return inner; - } - -private: - GLSLDecompiler& decomp; - std::string inner; -}; - -class ASTDecompiler { -public: - explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {} - - void operator()(const ASTProgram& ast) { - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - } - - void operator()(const ASTIfThen& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()(const ASTIfElse& ast) { - decomp.code.AddLine("else {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - - void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { - UNREACHABLE(); - } - - void operator()(const ASTBlockDecoded& ast) { - decomp.VisitBlock(ast.nodes); - } - - void operator()(const ASTVarSet& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); - } - - void operator()(const ASTLabel& ast) { - decomp.code.AddLine("// Label_{}:", ast.index); - } - - void operator()([[maybe_unused]] const ASTGoto& ast) { - UNREACHABLE(); - } - - void operator()(const ASTDoWhile& ast) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("do {{"); - decomp.code.scope++; - ASTNode current = ast.nodes.GetFirst(); - while (current) { - Visit(current); - current = current->GetNext(); - } - decomp.code.scope--; - decomp.code.AddLine("}} while({});", expr_parser.GetResult()); - } - - void operator()(const ASTReturn& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - if (ast.kills) { - decomp.code.AddLine("discard;"); - } else { - decomp.PreExit(); - decomp.code.AddLine("return;"); - } - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void operator()(const ASTBreak& ast) { - const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); - if (!is_true) { - ExprDecompiler expr_parser{decomp}; - std::visit(expr_parser, *ast.condition); - decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); - decomp.code.scope++; - } - decomp.code.AddLine("break;"); - if (!is_true) { - decomp.code.scope--; - decomp.code.AddLine("}}"); - } - } - - void Visit(const ASTNode& node) { - std::visit(*this, *node->GetInnerData()); - } - -private: - GLSLDecompiler& decomp; -}; - -void GLSLDecompiler::DecompileAST() { - const u32 num_flow_variables = ir.GetASTNumVariables(); - for (u32 i = 0; i < num_flow_variables; i++) { - code.AddLine("bool {} = false;", GetFlowVariable(i)); - } - - ASTDecompiler decompiler{*this}; - decompiler.Visit(ir.GetASTProgram()); -} - -} // Anonymous namespace - -ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { - ShaderEntries entries; - for (const auto& cbuf : ir.GetConstantBuffers()) { - entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), - cbuf.first); - } - for (const auto& [base, usage] : ir.GetGlobalMemory()) { - entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, - usage.is_written); - } - for (const auto& sampler : ir.GetSamplers()) { - entries.samplers.emplace_back(sampler); - } - for (const auto& image : ir.GetImages()) { - entries.images.emplace_back(image); - } - const auto clip_distances = ir.GetClipDistances(); - for (std::size_t i = 0; i < std::size(clip_distances); ++i) { - entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; - } - for (const auto& buffer : entries.const_buffers) { - entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); - } - entries.shader_length = ir.GetLength(); - return entries; -} - -std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, - ShaderType stage, std::string_view identifier, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); - decompiler.Decompile(); - return decompiler.GetResult(); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h deleted file mode 100644 index 0397a000c..000000000 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { - -class Device; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using SamplerEntry = VideoCommon::Shader::SamplerEntry; -using ImageEntry = VideoCommon::Shader::ImageEntry; - -class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { -public: - explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_) - : ConstBuffer{max_offset_, is_indirect_}, index{index_} {} - - u32 GetIndex() const { - return index; - } - -private: - u32 index = 0; -}; - -struct GlobalMemoryEntry { - constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_, - bool is_written_) - : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{ - is_written_} {} - - u32 cbuf_index = 0; - u32 cbuf_offset = 0; - bool is_read = false; - bool is_written = false; -}; - -struct ShaderEntries { - std::vector const_buffers; - std::vector global_memory_entries; - std::vector samplers; - std::vector images; - std::size_t shader_length{}; - u32 clip_distances{}; - u32 enabled_uniform_buffers{}; -}; - -ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage); - -std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - const VideoCommon::Shader::Registry& registry, - Tegra::Engines::ShaderType stage, std::string_view identifier, - std::string_view suffix = {}); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp deleted file mode 100644 index 0deb86517..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/fs/file.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/logging/log.h" -#include "common/scm_rev.h" -#include "common/settings.h" -#include "common/zstd_compression.h" -#include "core/core.h" -#include "core/hle/kernel/k_process.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_disk_cache.h" - -namespace OpenGL { - -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::BindlessSamplerMap; -using VideoCommon::Shader::BoundSamplerMap; -using VideoCommon::Shader::KeyMap; -using VideoCommon::Shader::SeparateSamplerKey; -using ShaderCacheVersionHash = std::array; - -struct ConstBufferKey { - u32 cbuf = 0; - u32 offset = 0; - u32 value = 0; -}; - -struct BoundSamplerEntry { - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct SeparateSamplerEntry { - u32 cbuf1 = 0; - u32 cbuf2 = 0; - u32 offset1 = 0; - u32 offset2 = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -struct BindlessSamplerEntry { - u32 cbuf = 0; - u32 offset = 0; - Tegra::Engines::SamplerDescriptor sampler; -}; - -namespace { - -constexpr u32 NativeVersion = 21; - -ShaderCacheVersionHash GetShaderCacheVersionHash() { - ShaderCacheVersionHash hash{}; - const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size()); - std::memcpy(hash.data(), Common::g_shader_cache_version, length); - return hash; -} - -} // Anonymous namespace - -ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; - -ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; - -bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { - if (!file.ReadObject(type)) { - return false; - } - u32 code_size; - u32 code_size_b; - if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) { - return false; - } - code.resize(code_size); - code_b.resize(code_size_b); - if (file.Read(code) != code_size) { - return false; - } - if (HasProgramA() && file.Read(code_b) != code_size_b) { - return false; - } - - u8 is_texture_handler_size_known; - u32 texture_handler_size_value; - u32 num_keys; - u32 num_bound_samplers; - u32 num_separate_samplers; - u32 num_bindless_samplers; - if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) || - !file.ReadObject(is_texture_handler_size_known) || - !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) || - !file.ReadObject(compute_info) || !file.ReadObject(num_keys) || - !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) || - !file.ReadObject(num_bindless_samplers)) { - return false; - } - if (is_texture_handler_size_known) { - texture_handler_size = texture_handler_size_value; - } - - std::vector flat_keys(num_keys); - std::vector flat_bound_samplers(num_bound_samplers); - std::vector flat_separate_samplers(num_separate_samplers); - std::vector flat_bindless_samplers(num_bindless_samplers); - if (file.Read(flat_keys) != flat_keys.size() || - file.Read(flat_bound_samplers) != flat_bound_samplers.size() || - file.Read(flat_separate_samplers) != flat_separate_samplers.size() || - file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) { - return false; - } - for (const auto& entry : flat_keys) { - keys.insert({{entry.cbuf, entry.offset}, entry.value}); - } - for (const auto& entry : flat_bound_samplers) { - bound_samplers.emplace(entry.offset, entry.sampler); - } - for (const auto& entry : flat_separate_samplers) { - SeparateSamplerKey key; - key.buffers = {entry.cbuf1, entry.cbuf2}; - key.offsets = {entry.offset1, entry.offset2}; - separate_samplers.emplace(key, entry.sampler); - } - for (const auto& entry : flat_bindless_samplers) { - bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); - } - - return true; -} - -bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { - if (!file.WriteObject(static_cast(type)) || - !file.WriteObject(static_cast(code.size())) || - !file.WriteObject(static_cast(code_b.size()))) { - return false; - } - if (file.Write(code) != code.size()) { - return false; - } - if (HasProgramA() && file.Write(code_b) != code_b.size()) { - return false; - } - - if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) || - !file.WriteObject(static_cast(texture_handler_size.has_value())) || - !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) || - !file.WriteObject(compute_info) || !file.WriteObject(static_cast(keys.size())) || - !file.WriteObject(static_cast(bound_samplers.size())) || - !file.WriteObject(static_cast(separate_samplers.size())) || - !file.WriteObject(static_cast(bindless_samplers.size()))) { - return false; - } - - std::vector flat_keys; - flat_keys.reserve(keys.size()); - for (const auto& [address, value] : keys) { - flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); - } - - std::vector flat_bound_samplers; - flat_bound_samplers.reserve(bound_samplers.size()); - for (const auto& [address, sampler] : bound_samplers) { - flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); - } - - std::vector flat_separate_samplers; - flat_separate_samplers.reserve(separate_samplers.size()); - for (const auto& [key, sampler] : separate_samplers) { - SeparateSamplerEntry entry; - std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; - std::tie(entry.offset1, entry.offset2) = key.offsets; - entry.sampler = sampler; - flat_separate_samplers.push_back(entry); - } - - std::vector flat_bindless_samplers; - flat_bindless_samplers.reserve(bindless_samplers.size()); - for (const auto& [address, sampler] : bindless_samplers) { - flat_bindless_samplers.push_back( - BindlessSamplerEntry{address.first, address.second, sampler}); - } - - return file.Write(flat_keys) == flat_keys.size() && - file.Write(flat_bound_samplers) == flat_bound_samplers.size() && - file.Write(flat_separate_samplers) == flat_separate_samplers.size() && - file.Write(flat_bindless_samplers) == flat_bindless_samplers.size(); -} - -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; - -ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; - -void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { - title_id = title_id_; -} - -std::optional> ShaderDiskCacheOpenGL::LoadTransferable() { - // Skip games without title id - const bool has_title_id = title_id != 0; - if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return std::nullopt; - } - - Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found"); - is_usable = true; - return std::nullopt; - } - - u32 version{}; - if (!file.ReadObject(version)) { - LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return std::nullopt; - } - - if (version < NativeVersion) { - LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); - file.Close(); - InvalidateTransferable(); - is_usable = true; - return std::nullopt; - } - if (version > NativeVersion) { - LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " - "of the emulator, skipping"); - return std::nullopt; - } - - // Version is valid, load the shaders - std::vector entries; - while (static_cast(file.Tell()) < file.GetSize()) { - ShaderDiskCacheEntry& entry = entries.emplace_back(); - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return std::nullopt; - } - } - - is_usable = true; - return {std::move(entries)}; -} - -std::vector ShaderDiskCacheOpenGL::LoadPrecompiled() { - if (!is_usable) { - return {}; - } - - Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); - return {}; - } - - if (const auto result = LoadPrecompiledFile(file)) { - return *result; - } - - LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); - file.Close(); - InvalidatePrecompiled(); - return {}; -} - -std::optional> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - Common::FS::IOFile& file) { - // Read compressed file from disk and decompress to virtual precompiled cache file - std::vector compressed(file.GetSize()); - if (file.Read(compressed) != file.GetSize()) { - return std::nullopt; - } - const std::vector decompressed = Common::Compression::DecompressDataZSTD(compressed); - SaveArrayToPrecompiled(decompressed.data(), decompressed.size()); - precompiled_cache_virtual_file_offset = 0; - - ShaderCacheVersionHash file_hash{}; - if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - if (GetShaderCacheVersionHash() != file_hash) { - LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); - precompiled_cache_virtual_file_offset = 0; - return std::nullopt; - } - - std::vector entries; - while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 binary_size; - auto& entry = entries.emplace_back(); - if (!LoadObjectFromPrecompiled(entry.unique_identifier) || - !LoadObjectFromPrecompiled(entry.binary_format) || - !LoadObjectFromPrecompiled(binary_size)) { - return std::nullopt; - } - - entry.binary.resize(binary_size); - if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return std::nullopt; - } - } - return entries; -} - -void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!Common::FS::RemoveFile(GetTransferablePath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", - Common::FS::PathToUTF8String(GetTransferablePath())); - } - InvalidatePrecompiled(); -} - -void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { - // Clear virtaul precompiled cache file - precompiled_cache_virtual_file.Resize(0); - - if (!Common::FS::RemoveFile(GetPrecompiledPath())) { - LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", - Common::FS::PathToUTF8String(GetPrecompiledPath())); - } -} - -void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { - if (!is_usable) { - return; - } - - const u64 id = entry.unique_identifier; - if (stored_transferable.contains(id)) { - // The shader already exists - return; - } - - Common::FS::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) { - return; - } - if (!entry.Save(file)) { - LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - return; - } - - stored_transferable.insert(id); -} - -void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { - if (!is_usable) { - return; - } - - // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header - // when writing the dump. This should be done the moment I get access to write to the virtual - // file. - if (precompiled_cache_virtual_file.GetSize() == 0) { - SavePrecompiledHeaderToVirtualPrecompiledCache(); - } - - GLint binary_length; - glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - - GLenum binary_format; - std::vector binary(binary_length); - glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - - if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || - !SaveObjectToPrecompiled(static_cast(binary.size())) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - unique_identifier); - InvalidatePrecompiled(); - } -} - -Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { - if (!EnsureDirectories()) { - return {}; - } - - const auto transferable_path{GetTransferablePath()}; - const bool existed = Common::FS::Exists(transferable_path); - - Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append, - Common::FS::FileType::BinaryFile}; - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - if (!existed || file.GetSize() == 0) { - // If the file didn't exist, write its version - if (!file.WriteObject(NativeVersion)) { - LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}", - Common::FS::PathToUTF8String(transferable_path)); - return {}; - } - } - return file; -} - -void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { - const auto hash{GetShaderCacheVersionHash()}; - if (!SaveArrayToPrecompiled(hash.data(), hash.size())) { - LOG_ERROR( - Render_OpenGL, - "Failed to write precompiled cache version hash to virtual precompiled cache file"); - } -} - -void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { - precompiled_cache_virtual_file_offset = 0; - const std::vector uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); - const std::vector compressed = - Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); - - const auto precompiled_path = GetPrecompiledPath(); - Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write, - Common::FS::FileType::BinaryFile}; - - if (!file.IsOpen()) { - LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - return; - } - if (file.Write(compressed) != compressed.size()) { - LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", - Common::FS::PathToUTF8String(precompiled_path)); - } -} - -bool ShaderDiskCacheOpenGL::EnsureDirectories() const { - const auto CreateDir = [](const std::filesystem::path& dir) { - if (!Common::FS::CreateDir(dir)) { - LOG_ERROR(Render_OpenGL, "Failed to create directory={}", - Common::FS::PathToUTF8String(dir)); - return false; - } - return true; - }; - - return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) && - CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && - CreateDir(GetPrecompiledDir()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const { - return GetTransferableDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID()); -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const { - return GetBaseDir() / "transferable"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const { - return GetBaseDir() / "precompiled"; -} - -std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const { - return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl"; -} - -std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", title_id); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h deleted file mode 100644 index f8bc23868..000000000 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "core/file_sys/vfs_vector.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/registry.h" - -namespace Common::FS { -class IOFile; -} - -namespace OpenGL { - -using ProgramCode = std::vector; - -/// Describes a shader and how it's used by the guest GPU -struct ShaderDiskCacheEntry { - ShaderDiskCacheEntry(); - ~ShaderDiskCacheEntry(); - - bool Load(Common::FS::IOFile& file); - - bool Save(Common::FS::IOFile& file) const; - - bool HasProgramA() const { - return !code.empty() && !code_b.empty(); - } - - Tegra::Engines::ShaderType type{}; - ProgramCode code; - ProgramCode code_b; - - u64 unique_identifier = 0; - std::optional texture_handler_size; - u32 bound_buffer = 0; - VideoCommon::Shader::GraphicsInfo graphics_info; - VideoCommon::Shader::ComputeInfo compute_info; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::SeparateSamplerMap separate_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; -}; - -/// Contains an OpenGL dumped binary program -struct ShaderDiskCachePrecompiled { - u64 unique_identifier = 0; - GLenum binary_format = 0; - std::vector binary; -}; - -class ShaderDiskCacheOpenGL { -public: - explicit ShaderDiskCacheOpenGL(); - ~ShaderDiskCacheOpenGL(); - - /// Binds a title ID for all future operations. - void BindTitleID(u64 title_id); - - /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional> LoadTransferable(); - - /// Loads current game's precompiled cache. Invalidates on failure. - std::vector LoadPrecompiled(); - - /// Removes the transferable (and precompiled) cache file. - void InvalidateTransferable(); - - /// Removes the precompiled cache file and clears virtual precompiled cache file. - void InvalidatePrecompiled(); - - /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveEntry(const ShaderDiskCacheEntry& entry); - - /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SavePrecompiled(u64 unique_identifier, GLuint program); - - /// Serializes virtual precompiled shader cache file to real file - void SaveVirtualPrecompiledFile(); - -private: - /// Loads the transferable cache. Returns empty on failure. - std::optional> LoadPrecompiledFile( - Common::FS::IOFile& file); - - /// Opens current game's transferable file and write it's header if it doesn't exist - Common::FS::IOFile AppendTransferableFile() const; - - /// Save precompiled header to precompiled_cache_in_memory - void SavePrecompiledHeaderToVirtualPrecompiledCache(); - - /// Create shader disk cache directories. Returns true on success. - bool EnsureDirectories() const; - - /// Gets current game's transferable file path - std::filesystem::path GetTransferablePath() const; - - /// Gets current game's precompiled file path - std::filesystem::path GetPrecompiledPath() const; - - /// Get user's transferable directory path - std::filesystem::path GetTransferableDir() const; - - /// Get user's precompiled directory path - std::filesystem::path GetPrecompiledDir() const; - - /// Get user's shader directory path - std::filesystem::path GetBaseDir() const; - - /// Get current game's title id - std::string GetTitleID() const; - - template - bool SaveArrayToPrecompiled(const T* data, std::size_t length) { - const std::size_t write_length = precompiled_cache_virtual_file.WriteArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += write_length; - return write_length == sizeof(T) * length; - } - - template - bool LoadArrayFromPrecompiled(T* data, std::size_t length) { - const std::size_t read_length = precompiled_cache_virtual_file.ReadArray( - data, length, precompiled_cache_virtual_file_offset); - precompiled_cache_virtual_file_offset += read_length; - return read_length == sizeof(T) * length; - } - - template - bool SaveObjectToPrecompiled(const T& object) { - return SaveArrayToPrecompiled(&object, 1); - } - - bool SaveObjectToPrecompiled(bool object) { - const auto value = static_cast(object); - return SaveArrayToPrecompiled(&value, 1); - } - - template - bool LoadObjectFromPrecompiled(T& object) { - return LoadArrayFromPrecompiled(&object, 1); - } - - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file - FileSys::VectorVfsFile precompiled_cache_virtual_file; - // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset = 0; - - // Stored transferable shaders - std::unordered_set stored_transferable; - - /// Title ID to operate on - u64 title_id = 0; - - // The cache has been loaded at boot - bool is_usable = false; -}; - -} // namespace OpenGL -- cgit v1.2.3 From e9a91bc5cc2c39b476ba8946f66930f5ab5608b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 6 Apr 2021 20:14:55 -0300 Subject: shader: Interact texture buffers with buffer cache --- .../backend/spirv/emit_context.cpp | 54 ++++---- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/shader_info.h | 2 +- src/video_core/buffer_cache/buffer_cache.h | 138 +++++++++++++++++++++ src/video_core/renderer_opengl/gl_buffer_cache.h | 1 + .../renderer_opengl/gl_texture_cache.cpp | 4 + src/video_core/renderer_opengl/gl_texture_cache.h | 2 + src/video_core/renderer_vulkan/pipeline_helper.h | 26 ++-- src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 57 ++++++--- src/video_core/renderer_vulkan/vk_buffer_cache.h | 18 +++ .../renderer_vulkan/vk_compute_pipeline.cpp | 30 +++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 +++-- .../renderer_vulkan/vk_texture_cache.cpp | 63 ++-------- src/video_core/renderer_vulkan/vk_texture_cache.h | 30 ++--- src/video_core/texture_cache/image_view_base.cpp | 9 ++ src/video_core/texture_cache/image_view_base.h | 1 + src/video_core/texture_cache/texture_cache.h | 13 +- 17 files changed, 333 insertions(+), 148 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index d01633628..b738e00cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -130,8 +130,8 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin DefineSharedMemory(program); DefineConstantBuffers(program.info, binding); DefineStorageBuffers(program.info, binding); - DefineTextures(program.info, binding); DefineTextureBuffers(program.info, binding); + DefineTextures(program.info, binding); DefineAttributeMemAccess(program.info); DefineLabels(program); } @@ -516,6 +516,32 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { } } +void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { + if (info.texture_buffer_descriptors.empty()) { + return; + } + const spv::ImageFormat format{spv::ImageFormat::Unknown}; + image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); + sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); + + const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; + texture_buffers.reserve(info.texture_buffer_descriptors.size()); + for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { + if (desc.count != 1) { + throw NotImplementedException("Array of texture buffers"); + } + const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; + Decorate(id, spv::Decoration::Binding, binding); + Decorate(id, spv::Decoration::DescriptorSet, 0U); + Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); + texture_buffers.insert(texture_buffers.end(), desc.count, id); + if (profile.supported_spirv >= 0x00010400) { + interfaces.push_back(id); + } + binding += desc.count; + } +} + void EmitContext::DefineTextures(const Info& info, u32& binding) { textures.reserve(info.texture_descriptors.size()); for (const TextureDescriptor& desc : info.texture_descriptors) { @@ -544,32 +570,6 @@ void EmitContext::DefineTextures(const Info& info, u32& binding) { } } -void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { - if (info.texture_buffer_descriptors.empty()) { - return; - } - const spv::ImageFormat format{spv::ImageFormat::Unknown}; - image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); - sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); - - const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; - texture_buffers.reserve(info.texture_buffer_descriptors.size()); - for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { - if (desc.count != 1) { - throw NotImplementedException("Array of texture buffers"); - } - const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding); - Decorate(id, spv::Decoration::DescriptorSet, 0U); - Name(id, fmt::format("texbuf{}_{:02x}", desc.cbuf_index, desc.cbuf_offset)); - texture_buffers.insert(texture_buffers.end(), desc.count, id); - if (profile.supported_spirv >= 0x00010400) { - interfaces.push_back(id); - } - binding += desc.count; - } -} - void EmitContext::DefineLabels(IR::Program& program) { for (IR::Block* const block : program.blocks) { block->SetDefinition(OpLabel()); diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 2a10e94e5..f1ac4430c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -154,8 +154,8 @@ private: void DefineSharedMemory(const IR::Program& program); void DefineConstantBuffers(const Info& info, u32& binding); void DefineStorageBuffers(const Info& info, u32& binding); - void DefineTextures(const Info& info, u32& binding); void DefineTextureBuffers(const Info& info, u32& binding); + void DefineTextures(const Info& info, u32& binding); void DefineAttributeMemAccess(const Info& info); void DefineLabels(IR::Program& program); diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e6f0de8d8..4cc731198 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -119,8 +119,8 @@ struct Info { boost::container::static_vector constant_buffer_descriptors; boost::container::static_vector storage_buffers_descriptors; - TextureDescriptors texture_descriptors; TextureBufferDescriptors texture_buffer_descriptors; + TextureDescriptors texture_descriptors; }; } // namespace Shader diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7373cb62d..6701aab82 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,11 +43,14 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; using namespace Common::Literals; @@ -66,6 +70,7 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +101,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -142,11 +151,21 @@ public: void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -254,12 +273,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +297,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +307,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +350,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span ImmediateBuffer(size_t wanted_capacity); @@ -347,10 +377,12 @@ private: std::array vertex_buffers; std::array, NUM_STAGES> uniform_buffers; std::array, NUM_STAGES> storage_buffers; + std::array, NUM_STAGES> texture_buffers; std::array transform_feedback_buffers; std::array compute_uniform_buffers; std::array compute_storage_buffers; + std::array compute_texture_buffers; std::array enabled_uniform_buffers{}; u32 enabled_compute_uniform_buffers = 0; @@ -360,6 +392,9 @@ private: u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; + std::array enabled_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + std::array fast_bound_uniform_buffers{}; std::array uniform_cache_hits{}; @@ -619,6 +654,7 @@ void BufferCache

::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template @@ -626,6 +662,7 @@ void BufferCache

::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template @@ -660,6 +697,18 @@ void BufferCache

::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; +} + +template +void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; @@ -680,6 +729,18 @@ void BufferCache

::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); } +template +void BufferCache

::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; +} + +template +void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format) { + enabled_compute_texture_buffers |= 1U << tbo_index; + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + template void BufferCache

::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { @@ -988,6 +1049,26 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { }); } +template +void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { + u32 binding_index = 0; + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1050,6 +1131,26 @@ void BufferCache

::BindHostComputeStorageBuffers() { }); } +template +void BufferCache

::BindHostComputeTextureBuffers() { + u32 binding_index = 0; + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { + runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); + ++binding_index; + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + template void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { @@ -1060,6 +1161,7 @@ void BufferCache

::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1067,6 +1169,7 @@ template void BufferCache

::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template @@ -1166,6 +1269,14 @@ void BufferCache

::UpdateStorageBuffers(size_t stage) { }); } +template +void BufferCache

::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { @@ -1227,6 +1338,14 @@ void BufferCache

::UpdateComputeStorageBuffers() { }); } +template +void BufferCache

::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + }); +} + template void BufferCache

::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { Buffer& buffer = slot_buffers[buffer_id]; @@ -1581,6 +1700,25 @@ typename BufferCache

::Binding BufferCache

::StorageBufferBinding(GPUVAddr s return binding; } +template +typename BufferCache

::TextureBufferBinding BufferCache

::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + template std::span BufferCache

::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fe91aa452..ddcce5e97 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -155,6 +155,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ff0f03e99..a8bf84218 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1016,6 +1016,10 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info) + : VideoCommon::ImageViewBase{info, view_info} {} + ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index cf3b789e3..817b0e650 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -182,6 +182,8 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index decf0d32c..cff93cc60 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -24,7 +24,8 @@ struct TextureHandle { [[likely]] if (via_header_index) { image = data; sampler = data; - } else { + } + else { const Tegra::Texture::TextureHandle handle{data}; image = handle.tic_id; sampler = via_header_index ? image : handle.tsc_id.Value(); @@ -90,12 +91,12 @@ public: for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) { Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage); } + for ([[maybe_unused]] const auto& desc : info.texture_buffer_descriptors) { + Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); + } for ([[maybe_unused]] const auto& desc : info.texture_descriptors) { Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage); } - for (const auto& desc : info.texture_buffer_descriptors) { - Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage); - } } private: @@ -156,20 +157,15 @@ inline VideoCommon::ImageViewType CastType(Shader::TextureType type) { return {}; } -inline void PushImageDescriptors(const Shader::Info& info, const VkSampler* samplers, - const ImageId* image_view_ids, TextureCache& texture_cache, - VKUpdateDescriptorQueue& update_descriptor_queue, size_t& index) { +inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, + const ImageId*& image_view_ids, TextureCache& texture_cache, + VKUpdateDescriptorQueue& update_descriptor_queue) { + image_view_ids += info.texture_buffer_descriptors.size(); for (const auto& desc : info.texture_descriptors) { - const VkSampler sampler{samplers[index]}; - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; + const VkSampler sampler{*(samplers++)}; + ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))}; update_descriptor_queue.AddSampledImage(vk_image_view, sampler); - ++index; - } - for (const auto& desc : info.texture_buffer_descriptors) { - ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])}; - update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); - ++index; } } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..cdda56ab1 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -67,25 +67,50 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) - : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_) { - buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = SizeBytes(), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); + : VideoCommon::BufferBase(rasterizer_, cpu_addr_, size_bytes_), + device{&runtime.device}, + buffer{device->GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = SizeBytes(), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + })}, + commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { if (runtime.device.HasDebuggingToolAttached()) { buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); } - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); +} + +VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return *it->handle; + } + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .handle = device->GetLogical().CreateBufferView({ + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = *buffer, + .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, + .offset = offset, + .range = size, + }), + }); + return *views.back().handle; } BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..ea17406dc 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -9,6 +9,7 @@ #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" +#include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -26,6 +27,8 @@ public: explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_); + [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] VkBuffer Handle() const noexcept { return *buffer; } @@ -35,8 +38,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + vk::BufferView handle; + }; + + const Device* device{}; vk::Buffer buffer; MemoryCommit commit; + std::vector views; }; class BufferCacheRuntime { @@ -87,6 +99,11 @@ public: BindBuffer(buffer, offset, size); } + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format) { + update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); + } + private: void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { update_descriptor_queue.AddBuffer(buffer, offset, size); @@ -123,6 +140,7 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; + static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 9922cbd0f..ac47b1f3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -80,8 +80,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, desc.is_written); ++ssbo_index; } - buffer_cache.UpdateComputeBuffers(); - buffer_cache.BindHostComputeBuffers(); texture_cache.SynchronizeComputeDescriptors(); @@ -99,6 +97,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -106,16 +108,26 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); samplers.push_back(sampler->Handle()); } - for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); - } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t image_index{}; - PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - update_descriptor_queue, image_index); + buffer_cache.UnbindComputeTextureBuffers(); + ImageId* texture_buffer_ids{image_view_ids.data()}; + size_t index{}; + for (const auto& desc : info.texture_buffer_descriptors) { + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); + buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), + image_view.format); + ++texture_buffer_ids; + ++index; + } + buffer_cache.UpdateComputeBuffers(); + buffer_cache.BindHostComputeBuffers(); + + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; + PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); if (!is_built.load(std::memory_order::relaxed)) { // Wait for the pipeline to be built diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index afdd8b371..893258b4a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -175,6 +175,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { const u32 raw_handle{gpu_memory.Read(addr)}; return TextureHandle(raw_handle, via_header_index); }}; + for (const auto& desc : info.texture_buffer_descriptors) { + const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; + image_view_indices.push_back(handle.image); + } for (const auto& desc : info.texture_descriptors) { const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; image_view_indices.push_back(handle.image); @@ -182,24 +186,37 @@ void GraphicsPipeline::Configure(bool is_indexed) { Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } + } + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + size_t index{}; for (const auto& desc : info.texture_buffer_descriptors) { - const TextureHandle handle{read_handle(desc.cbuf_index, desc.cbuf_offset)}; - image_view_indices.push_back(handle.image); + ASSERT(desc.count == 1); + ImageView& image_view = texture_cache.GetImageView(*texture_buffer_index); + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format); + ++index; + ++texture_buffer_index; } + texture_buffer_index += info.texture_descriptors.size(); } - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); buffer_cache.UpdateGraphicsBuffers(is_indexed); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); buffer_cache.BindHostGeometryBuffers(is_indexed); update_descriptor_queue.Acquire(); - size_t index{}; + const VkSampler* samplers_it{samplers.data()}; + const ImageId* views_it{image_view_ids.data()}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { buffer_cache.BindHostStageBuffers(stage); - PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - texture_cache, update_descriptor_queue, index); + PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, + update_descriptor_queue); } texture_cache.UpdateRenderTargets(false); scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1bbc542a1..e42b091c5 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -15,10 +15,10 @@ #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_render_pass_cache.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -162,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array& color) { return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); } -[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { - if (info.type != ImageType::Buffer) { - return vk::Buffer{}; - } - const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); - return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = info.size.width * bytes_per_block, - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }); -} - [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { switch (VideoCore::Surface::GetFormatType(format)) { case VideoCore::Surface::SurfaceType::ColorTexture: @@ -813,13 +794,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, - image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), + image(MakeImage(runtime.device, info)), + commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), aspect_mask(ImageAspectMask(info.format)) { - if (image) { - commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); - } else { - commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); - } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (Settings::values.accelerate_astc.GetValue()) { flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; @@ -828,11 +805,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } if (runtime.device.HasDebuggingToolAttached()) { - if (image) { - image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } else { - buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); - } + image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); } static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, @@ -884,19 +857,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { - // TODO: Move this to another API - scheduler->RequestOutsideRenderPassOperationContext(); - std::vector vk_copies = TransformBufferCopies(copies, map.offset); - const VkBuffer src_buffer = map.buffer; - const VkBuffer dst_buffer = *buffer; - scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { - // TODO: Barriers - cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); - }); -} - void Image::DownloadMemory(const StagingBufferRef& map, std::span copies) { std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); scheduler->RequestOutsideRenderPassOperationContext(); @@ -1032,19 +992,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI UNIMPLEMENTED(); break; case VideoCommon::ImageViewType::Buffer: - buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .buffer = image.Buffer(), - .format = format_info.format, - .offset = 0, // TODO: Redesign buffer cache to support this - .range = image.guest_size_bytes, - }); + UNREACHABLE(); break; } } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 189ee5a68..498e76a1c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -41,9 +41,9 @@ struct TextureCacheRuntime { void Finish(); - [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); + StagingBufferRef UploadStagingBuffer(size_t size); - [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size); void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, const Region2D& dst_region, const Region2D& src_region, @@ -54,7 +54,7 @@ struct TextureCacheRuntime { void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); - [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { + bool CanAccelerateImageUpload(Image&) const noexcept { return false; } @@ -92,8 +92,6 @@ public: void UploadMemory(const StagingBufferRef& map, std::span copies); - void UploadMemory(const StagingBufferRef& map, std::span copies); - void DownloadMemory(const StagingBufferRef& map, std::span copies); @@ -101,10 +99,6 @@ public: return *image; } - [[nodiscard]] VkBuffer Buffer() const noexcept { - return *buffer; - } - [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { return aspect_mask; } @@ -121,7 +115,6 @@ public: private: VKScheduler* scheduler; vk::Image image; - vk::Buffer buffer; MemoryCommit commit; vk::ImageView image_view; std::vector storage_image_views; @@ -132,6 +125,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); [[nodiscard]] VkImageView DepthView(); @@ -142,10 +137,6 @@ public: return *image_views[static_cast(query_type)]; } - [[nodiscard]] VkBufferView BufferView() const noexcept { - return *buffer_view; - } - [[nodiscard]] VkImage ImageHandle() const noexcept { return image_handle; } @@ -162,6 +153,14 @@ public: return samples; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); @@ -169,11 +168,12 @@ private: std::array image_views; vk::ImageView depth_view; vk::ImageView stencil_view; - vk::BufferView buffer_view; VkImage image_handle = VK_NULL_HANDLE; VkImageView render_target = VK_NULL_HANDLE; PixelFormat image_format = PixelFormat::Invalid; VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp index e8d632f9e..450becbeb 100644 --- a/src/video_core/texture_cache/image_view_base.cpp +++ b/src/video_core/texture_cache/image_view_base.cpp @@ -36,6 +36,15 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i } } +ImageViewBase::ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info) + : format{info.format}, type{ImageViewType::Buffer}, size{ + .width = info.size.width, + .height = 1, + .depth = 1, + } { + ASSERT_MSG(view_info.type == ImageViewType::Buffer, "Expected texture buffer"); +} + ImageViewBase::ImageViewBase(const NullImageParams&) {} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h index 73954167e..903f715c5 100644 --- a/src/video_core/texture_cache/image_view_base.h +++ b/src/video_core/texture_cache/image_view_base.h @@ -27,6 +27,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) struct ImageViewBase { explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, ImageId image_id); + explicit ImageViewBase(const ImageInfo& info, const ImageViewInfo& view_info); explicit ImageViewBase(const NullImageParams&); [[nodiscard]] bool IsBuffer() const noexcept { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 85ce06d56..5e8d99482 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -968,9 +968,6 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); - } else if (image.info.type == ImageType::Buffer) { - const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; - image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); @@ -993,7 +990,12 @@ ImageViewId TextureCache

::FindImageView(const TICEntry& config) { template ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { const ImageInfo info(config); - const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); if (!image_id) { return NULL_IMAGE_VIEW_ID; @@ -1801,6 +1803,9 @@ void TextureCache

::PrepareImageView(ImageViewId image_view_id, bool is_modifi return; } const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } PrepareImage(image_view.image_id, is_modification, invalidate); } -- cgit v1.2.3 From f4ace63957ee47c4e3e913954f07375d0391beae Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 24 Apr 2021 18:27:25 -0300 Subject: shader: Accelerate pipeline transitions and use dirty flags for shaders --- src/video_core/dirty_flags.cpp | 6 +++ src/video_core/dirty_flags.h | 2 + src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_state_tracker.cpp | 6 --- src/video_core/renderer_opengl/gl_state_tracker.h | 1 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 46 +++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.h | 54 +++++++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 31 ++++++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 ++---------- 9 files changed, 114 insertions(+), 64 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 7149af290..b1be065c3 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -58,6 +58,11 @@ void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { FillBlock(table, OFF(zeta), NUM(zeta), flag); } } + +void SetupDirtyShaders(Maxwell3D::DirtyState::Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), + NUM(shader_config[0]) * Maxwell3D::Regs::MaxShaderProgram, Shaders); +} } // Anonymous namespace void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { @@ -65,6 +70,7 @@ void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { SetupIndexBuffer(tables); SetupDirtyDescriptors(tables); SetupDirtyRenderTargets(tables); + SetupDirtyShaders(tables); } } // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 702688ace..504465d3f 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -36,6 +36,8 @@ enum : u8 { IndexBuffer, + Shaders, + LastCommonEntry, }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3551dbdcc..dd1937863 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -635,7 +635,7 @@ void RasterizerOpenGL::SyncDepthClamp() { void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index dbdf5230f..586da84e3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) { FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); } -void SetupDirtyShaders(Tables& tables) { - FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, - Shaders); -} - void SetupDirtyPolygonModes(Tables& tables) { tables[0][OFF(polygon_mode_front)] = PolygonModeFront; tables[0][OFF(polygon_mode_back)] = PolygonModeBack; @@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} SetupDirtyScissors(tables); SetupDirtyVertexInstances(tables); SetupDirtyVertexFormat(tables); - SetupDirtyShaders(tables); SetupDirtyPolygonModes(tables); SetupDirtyDepthTest(tables); SetupDirtyStencilTest(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 94c905116..5864c7c07 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -52,7 +52,6 @@ enum : u8 { BlendState0, BlendState7 = BlendState0 + 7, - Shaders, ClipDistances, PolygonModes, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2bc1f67ae..100a5e07a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -125,13 +125,12 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state_, + const GraphicsPipelineCacheKey& key_, std::array stages, const std::array& infos) - : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, - update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ - std::move(stages)} { + update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -144,7 +143,7 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, pipeline_layout = builder.CreatePipelineLayout(set_layout); descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; MakePipeline(device, render_pass); std::lock_guard lock{build_mutex}; @@ -158,6 +157,11 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, } } +void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { + transition_keys.push_back(transition->key); + transitions.push_back(transition); +} + void GraphicsPipeline::Configure(bool is_indexed) { static constexpr size_t max_images_elements = 64; std::array image_view_ids; @@ -294,12 +298,12 @@ void GraphicsPipeline::Configure(bool is_indexed) { void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { - dynamic = state.dynamic_state; + dynamic = key.state.dynamic_state; } static_vector vertex_bindings; static_vector vertex_binding_divisors; for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - const bool instanced = state.binding_divisors[index] != 0; + const bool instanced = key.state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ .binding = static_cast(index), @@ -309,14 +313,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (instanced) { vertex_binding_divisors.push_back({ .binding = static_cast(index), - .divisor = state.binding_divisors[index], + .divisor = key.state.binding_divisors[index], }); } } static_vector vertex_attributes; const auto& input_attributes = stage_infos[0].input_generics; - for (size_t index = 0; index < state.attributes.size(); ++index) { - const auto& attribute = state.attributes[index]; + for (size_t index = 0; index < key.state.attributes.size(); ++index) { + const auto& attribute = key.state.attributes[index]; if (!attribute.enabled || !input_attributes[index].used) { continue; } @@ -345,7 +349,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa if (!vertex_binding_divisors.empty()) { vertex_input_ci.pNext = &input_divisor_ci; } - auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); + auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { if (!spv_modules[1] && !spv_modules[2]) { LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); @@ -357,14 +361,14 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .topology = input_assembly_topology, - .primitiveRestartEnable = state.primitive_restart_enable != 0 && + .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; const VkPipelineTessellationStateCreateInfo tessellation_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, + .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, }; VkPipelineViewportStateCreateInfo viewport_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, @@ -376,7 +380,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pScissors = nullptr, }; std::array swizzles; - std::ranges::transform(state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); + std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, .pNext = nullptr, @@ -393,15 +397,15 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .pNext = nullptr, .flags = 0, .depthClampEnable = - static_cast(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), .rasterizerDiscardEnable = - static_cast(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), + static_cast(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), .polygonMode = - MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(state.polygon_mode)), + MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), .cullMode = static_cast( dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), - .depthBiasEnable = state.depth_bias_enable, + .depthBiasEnable = key.state.depth_bias_enable, .depthBiasConstantFactor = 0.0f, .depthBiasClamp = 0.0f, .depthBiasSlopeFactor = 0.0f, @@ -411,7 +415,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .rasterizationSamples = MaxwellToVK::MsaaMode(state.msaa_mode), + .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), .sampleShadingEnable = VK_FALSE, .minSampleShading = 0.0f, .pSampleMask = nullptr, @@ -435,7 +439,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa .maxDepthBounds = 0.0f, }; static_vector cb_attachments; - const size_t num_attachments{NumAttachments(state)}; + const size_t num_attachments{NumAttachments(key.state)}; for (size_t index = 0; index < num_attachments; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, @@ -443,7 +447,7 @@ void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pa VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; - const auto& blend{state.attachments[index]}; + const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; for (size_t i = 0; i < mask_table.size(); ++i) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 7d14d2378..fd787840b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,10 +4,12 @@ #pragma once +#include #include #include #include #include +#include #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" @@ -20,6 +22,39 @@ namespace Vulkan { +struct GraphicsPipelineCacheKey { + std::array unique_hashes; + FixedPipelineState state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + size_t Size() const noexcept { + return sizeof(unique_hashes) + state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +} // namespace Vulkan + +namespace std { +template <> +struct hash { + size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std + +namespace Vulkan { + class Device; class RenderPassCache; class VKScheduler; @@ -35,7 +70,8 @@ public: const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, - RenderPassCache& render_pass_cache, const FixedPipelineState& state, + RenderPassCache& render_pass_cache, + const GraphicsPipelineCacheKey& key, std::array stages, const std::array& infos); @@ -47,16 +83,30 @@ public: GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; + void AddTransition(GraphicsPipeline* transition); + + GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { + if (key == current_key) { + return this; + } + const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; + return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] + : nullptr; + } + private: void MakePipeline(const Device& device, VkRenderPass render_pass); + const GraphicsPipelineCacheKey key; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::MemoryManager& gpu_memory; TextureCache& texture_cache; BufferCache& buffer_cache; VKScheduler& scheduler; VKUpdateDescriptorQueue& update_descriptor_queue; - const FixedPipelineState state; + + std::vector transition_keys; + std::vector transitions; std::array spv_modules; std::array stage_infos; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4317b2ac7..2bd870060 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -21,6 +21,7 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/frontend/maxwell/program.h" #include "shader_recompiler/program_header.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -700,17 +701,28 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (!RefreshStages()) { + current_pipeline = nullptr; return nullptr; } graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); + if (current_pipeline) { + GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; + if (next) { + current_pipeline = next; + return current_pipeline; + } + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; - if (!is_new) { - return pipeline.get(); + if (is_new) { + pipeline = CreateGraphicsPipeline(); } - pipeline = CreateGraphicsPipeline(); - return pipeline.get(); + if (current_pipeline) { + current_pipeline->AddTransition(pipeline.get()); + } + current_pipeline = pipeline.get(); + return current_pipeline; } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -743,6 +755,12 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { } bool PipelineCache::RefreshStages() { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_valid_shaders; + } + dirty[VideoCommon::Dirty::Shaders] = false; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { @@ -755,6 +773,7 @@ bool PipelineCache::RefreshStages() { const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; if (!cpu_shader_addr) { LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_valid_shaders = false; return false; } const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; @@ -766,6 +785,7 @@ bool PipelineCache::RefreshStages() { shader_infos[index] = shader_info; graphics_key.unique_hashes[index] = shader_info->unique_hash; } + last_valid_shaders = true; return true; } @@ -832,8 +852,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, - update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), - infos); + update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e12e4422f..ad569acc4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -58,26 +58,6 @@ static_assert(std::has_unique_object_representations_v) static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -struct GraphicsPipelineCacheKey { - std::array unique_hashes; - FixedPipelineState state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - size_t Size() const noexcept { - return sizeof(unique_hashes) + state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - } // namespace Vulkan namespace std { @@ -89,13 +69,6 @@ struct hash { } }; -template <> -struct hash { - size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { - return k.Hash(); - } -}; - } // namespace std namespace Vulkan { @@ -181,7 +154,10 @@ private: TextureCache& texture_cache; GraphicsPipelineCacheKey graphics_key{}; + GraphicsPipeline* current_pipeline{}; + std::array shader_infos{}; + bool last_valid_shaders{}; std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 025b20f96ae588777e3ff11083cc4184bf418af6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 26 Apr 2021 03:53:26 -0300 Subject: shader: Move pipeline cache logic to separate files Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL. --- src/video_core/CMakeLists.txt | 3 + src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 21 +- src/video_core/renderer_opengl/gl_shader_cache.h | 58 +- .../renderer_vulkan/vk_graphics_pipeline.h | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 719 +++------------------ src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/shader_cache.cpp | 233 +++++++ src/video_core/shader_cache.h | 198 ++---- src/video_core/shader_environment.cpp | 453 +++++++++++++ src/video_core/shader_environment.h | 198 ++++++ 12 files changed, 1095 insertions(+), 824 deletions(-) create mode 100644 src/video_core/shader_cache.cpp create mode 100644 src/video_core/shader_environment.cpp create mode 100644 src/video_core/shader_environment.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3166a69dc..6e0e4b8f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -145,7 +145,10 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + shader_cache.cpp shader_cache.h + shader_environment.cpp + shader_environment.h shader_notify.cpp shader_notify.h surface.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1f58f8791..2fdcbe4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -217,7 +217,7 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - ShaderCacheOpenGL shader_cache; + ShaderCache shader_cache; QueryCache query_cache; AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4dd166156..c3e490b40 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -29,18 +29,13 @@ namespace OpenGL { -Shader::Shader() = default; - -Shader::~Shader() = default; - -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) - : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} - -ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; +ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + +ShaderCache::~ShaderCache() = default; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ad3d15a76..96520e17c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -36,27 +36,59 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -class Shader { +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + std::array color_formats; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, u32> tessellation_primitive; + BitField<8, 2, u32> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + u32 padding; + TransformFeedbackState xfb_state; + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { public: - explicit Shader(); - ~Shader(); +private: }; -class ShaderCacheOpenGL final : public VideoCommon::ShaderCache { +class ShaderCache : public VideoCommon::ShaderCache { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); - ~ShaderCacheOpenGL() override; + explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); + ~ShaderCache(); private: Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -23,7 +23,7 @@ namespace Vulkan { struct GraphicsPipelineCacheKey { - std::array unique_hashes; + std::array unique_hashes; FixedPipelineState state; size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,7 +11,8 @@ #include "common/bit_cast.h" #include "common/cityhash.h" -#include "common/file_util.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" #include "common/thread_worker.h" #include "core/core.h" @@ -36,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -43,449 +45,19 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -template -auto MakeSpan(Container& container) { - return std::span(container.data(), container.size()); -} - -static u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast(index) << 32) | offset; -} - -class GenericEnvironment : public Shader::Environment { -public: - explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} { - start_address = start_address_; - } - - ~GenericEnvironment() override = default; - - u32 TextureBoundBuffer() const final { - return texture_bound; - } - - u32 LocalMemorySize() const final { - return local_memory_size; - } - - u32 SharedMemorySize() const final { - return shared_memory_size; - } - - std::array WorkgroupSize() const final { - return workgroup_size; - } - - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read(program_base + address); - } - - std::optional Analyze() { - const std::optional size{TryFindSize()}; - if (!size) { - return std::nullopt; - } - cached_lowest = start_address; - cached_highest = start_address + static_cast(*size); - return Common::CityHash128(reinterpret_cast(code.data()), *size); - } - - void SetCachedSize(size_t size_bytes) { - cached_lowest = start_address; - cached_highest = start_address + static_cast(size_bytes); - code.resize(CachedSize()); - gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); - } - - [[nodiscard]] size_t CachedSize() const noexcept { - return cached_highest - cached_lowest + INST_SIZE; - } - - [[nodiscard]] size_t ReadSize() const noexcept { - return read_highest - read_lowest + INST_SIZE; - } - - [[nodiscard]] bool CanBeSerialized() const noexcept { - return !has_unbound_instructions; - } - - [[nodiscard]] u128 CalculateHash() const { - const size_t size{ReadSize()}; - const auto data{std::make_unique(size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(data.get(), size); - } - - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast(CachedSize())}; - const u64 num_texture_types{static_cast(texture_types.size())}; - const u64 num_cbuf_values{static_cast(cbuf_values.size())}; - - file.write(reinterpret_cast(&code_size), sizeof(code_size)) - .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .write(reinterpret_cast(&start_address), sizeof(start_address)) - .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) - .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) - .write(reinterpret_cast(&stage), sizeof(stage)) - .write(reinterpret_cast(code.data()), code_size); - for (const auto [key, type] : texture_types) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - for (const auto [key, type] : cbuf_values) { - file.write(reinterpret_cast(&key), sizeof(key)) - .write(reinterpret_cast(&type), sizeof(type)); - } - if (stage == Shader::Stage::Compute) { - file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .write(reinterpret_cast(&shared_memory_size), - sizeof(shared_memory_size)); - } else { - file.write(reinterpret_cast(&sph), sizeof(sph)); - } - } - -protected: - static constexpr size_t INST_SIZE = sizeof(u64); - - std::optional TryFindSize() { - constexpr size_t BLOCK_SIZE = 0x1000; - constexpr size_t MAXIMUM_SIZE = 0x100000; - - constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - - GPUVAddr guest_addr{program_base + start_address}; - size_t offset{0}; - size_t size{BLOCK_SIZE}; - while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); - u64* const data = code.data() + offset / INST_SIZE; - gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { - const u64 inst = data[index / INST_SIZE]; - if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + index; - } - } - guest_addr += BLOCK_SIZE; - size += BLOCK_SIZE; - offset += BLOCK_SIZE; - } - return std::nullopt; - } - - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; - Tegra::Texture::TICEntry entry; - gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - - const Shader::TextureType result{[&] { - switch (entry.texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return Shader::TextureType::Color1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return Shader::TextureType::Color2D; - case Tegra::Texture::TextureType::Texture3D: - return Shader::TextureType::Color3D; - case Tegra::Texture::TextureType::TextureCubemap: - return Shader::TextureType::ColorCube; - case Tegra::Texture::TextureType::Texture1DArray: - return Shader::TextureType::ColorArray1D; - case Tegra::Texture::TextureType::Texture2DArray: - return Shader::TextureType::ColorArray2D; - case Tegra::Texture::TextureType::Texture1DBuffer: - return Shader::TextureType::Buffer; - case Tegra::Texture::TextureType::TextureCubeArray: - return Shader::TextureType::ColorArrayCube; - default: - throw Shader::NotImplementedException("Unknown texture type"); - } - }()}; - texture_types.emplace(raw, result); - return result; - } - - Tegra::MemoryManager* gpu_memory{}; - GPUVAddr program_base{}; - - std::vector code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - - u32 local_memory_size{}; - u32 texture_bound{}; - u32 shared_memory_size{}; - std::array workgroup_size{}; - - u32 read_lowest = std::numeric_limits::max(); - u32 read_highest = 0; - - u32 cached_lowest = std::numeric_limits::max(); - u32 cached_highest = 0; - - bool has_unbound_instructions = false; -}; - namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; -// TODO: Move this to a separate file -constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{2}; - -class GraphicsEnvironment final : public GenericEnvironment { -public: - explicit GraphicsEnvironment() = default; - explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); - switch (program) { - case Maxwell::ShaderProgram::VertexA: - stage = Shader::Stage::VertexA; - stage_index = 0; - break; - case Maxwell::ShaderProgram::VertexB: - stage = Shader::Stage::VertexB; - stage_index = 0; - break; - case Maxwell::ShaderProgram::TesselationControl: - stage = Shader::Stage::TessellationControl; - stage_index = 1; - break; - case Maxwell::ShaderProgram::TesselationEval: - stage = Shader::Stage::TessellationEval; - stage_index = 2; - break; - case Maxwell::ShaderProgram::Geometry: - stage = Shader::Stage::Geometry; - stage_index = 3; - break; - case Maxwell::ShaderProgram::Fragment: - stage = Shader::Stage::Fragment; - stage_index = 4; - break; - default: - UNREACHABLE_MSG("Invalid program={}", program); - break; - } - const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits::max()); - local_memory_size = static_cast(local_size); - texture_bound = maxwell3d->regs.tex_cb_index; - } - - ~GraphicsEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.address + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{maxwell3d->regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); - } - -private: - Tegra::Engines::Maxwell3D* maxwell3d{}; - size_t stage_index{}; -}; - -class ComputeEnvironment final : public GenericEnvironment { -public: - explicit ComputeEnvironment() = default; - explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ - &kepler_compute_} { - const auto& qmd{kepler_compute->launch_description}; - stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; - texture_bound = kepler_compute->regs.tex_cb_index; - shared_memory_size = qmd.shared_alloc; - workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - - ~ComputeEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read(cbuf.Address() + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{kepler_compute->regs}; - const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); - } - -private: - Tegra::Engines::KeplerCompute* kepler_compute{}; -}; - -void SerializePipeline(std::span key, std::span envs, - std::ofstream& file) { - if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { - return; - } - const u32 num_envs{static_cast(envs.size())}; - file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); - for (const GenericEnvironment* const env : envs) { - env->Serialize(file); - } - file.write(key.data(), key.size_bytes()); -} - -template -void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { - try { - std::ofstream file; - file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); - if (!file.is_open()) { - LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); - return; - } - if (file.tellp() == 0) { - file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) - .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); - } - const std::span key_span(reinterpret_cast(&key), sizeof(key)); - SerializePipeline(key_span, MakeSpan(envs), file); - - } catch (const std::ios_base::failure& e) { - LOG_ERROR(Common_Filesystem, "{}", e.what()); - if (!Common::FS::Delete(filename)) { - LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); - } - } +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); } -class FileEnvironment final : public Shader::Environment { -public: - void Deserialize(std::ifstream& file) { - u64 code_size{}; - u64 num_texture_types{}; - u64 num_cbuf_values{}; - file.read(reinterpret_cast(&code_size), sizeof(code_size)) - .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) - .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) - .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) - .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) - .read(reinterpret_cast(&start_address), sizeof(start_address)) - .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) - .read(reinterpret_cast(&read_highest), sizeof(read_highest)) - .read(reinterpret_cast(&stage), sizeof(stage)); - code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast(code.get()), code_size); - for (size_t i = 0; i < num_texture_types; ++i) { - u32 key; - Shader::TextureType type; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&type), sizeof(type)); - texture_types.emplace(key, type); - } - for (size_t i = 0; i < num_cbuf_values; ++i) { - u64 key; - u32 value; - file.read(reinterpret_cast(&key), sizeof(key)) - .read(reinterpret_cast(&value), sizeof(value)); - cbuf_values.emplace(key, value); - } - if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) - .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); - } else { - file.read(reinterpret_cast(&sph), sizeof(sph)); - } - } - - u64 ReadInstruction(u32 address) override { - if (address < read_lowest || address > read_highest) { - throw Shader::LogicError("Out of bounds address {}", address); - } - return code[(address - read_lowest) / sizeof(u64)]; - } - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; - if (it == cbuf_values.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto it{texture_types.find(handle)}; - if (it == texture_types.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - u32 LocalMemorySize() const override { - return local_memory_size; - } - - u32 SharedMemorySize() const override { - return shared_memory_size; - } - - u32 TextureBoundBuffer() const override { - return texture_bound; - } - - std::array WorkgroupSize() const override { - return workgroup_size; - } - -private: - std::unique_ptr code; - std::unordered_map texture_types; - std::unordered_map cbuf_values; - std::array workgroup_size{}; - u32 local_memory_size{}; - u32 shared_memory_size{}; - u32 texture_bound{}; - u32 read_lowest{}; - u32 read_highest{}; -}; - Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso } } // Anonymous namespace -void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - if (title_id == 0) { - return; - } - std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; - std::string base_dir{shader_dir + "/vulkan"}; - std::string transferable_dir{base_dir + "/transferable"}; - std::string precompiled_dir{base_dir + "/precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); - return; - } - pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - - struct { - std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; - } state; - - std::ifstream file; - Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - return; - } - file.exceptions(std::ifstream::failbit); - const auto end{file.tellg()}; - file.seekg(0, std::ios::beg); - - std::array magic_number; - u32 cache_version; - file.read(magic_number.data(), magic_number.size()) - .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { - file.close(); - if (Common::FS::Delete(pipeline_cache_filename)) { - if (magic_number != MAGIC_NUMBER) { - LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); - } - if (cache_version != CACHE_VERSION) { - LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); - } - } else { - LOG_ERROR(Render_Vulkan, - "Invalid pipeline cache file and failed to delete it in \"{}\"", - pipeline_cache_filename); - } - return; - } - while (file.tellg() != end) { - if (stop_loading) { - return; - } - u32 num_envs{}; - file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - std::vector envs(num_envs); - for (FileEnvironment& env : envs) { - env.Deserialize(file); - } - if (envs.front().ShaderStage() == Shader::Stage::Compute) { - ComputePipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } else { - GraphicsPipelineCacheKey key; - file.read(reinterpret_cast(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - boost::container::static_vector env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } - ++state.total; - } - { - std::lock_guard lock{state.mutex}; - callback(VideoCore::LoadCallbackStage::Build, 0, state.total); - state.has_loaded = true; - } - workers.WaitForRequests(); -} - size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) - : VideoCommon::ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (!RefreshStages()) { + if (!RefreshStages(graphics_key.unique_hashes)) { current_pipeline = nullptr; return nullptr; } @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - const GPUVAddr shader_addr{program_base + qmd.program_start}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - return nullptr; - } - const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* const shader{ComputeShader()}; if (!shader) { - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; - shader = MakeShaderInfo(env, *cpu_shader_addr); + return nullptr; } + const auto& qmd{kepler_compute.launch_description}; const ComputePipelineCacheKey key{ - .unique_hash{shader->unique_hash}, - .shared_memory_size{qmd.shared_alloc}, + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return pipeline.get(); } -bool PipelineCache::RefreshStages() { - auto& dirty{maxwell3d.dirty.flags}; - if (!dirty[VideoCommon::Dirty::Shaders]) { - return last_valid_shaders; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - dirty[VideoCommon::Dirty::Shaders] = false; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - graphics_key.unique_hashes[index] = u128{}; - continue; - } - const auto& shader_config{maxwell3d.regs.shader_config[index]}; - const auto program{static_cast(index)}; - const GPUVAddr shader_addr{base_addr + shader_config.offset}; - const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); - last_valid_shaders = false; - return false; - } - const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; - if (!shader_info) { - const u32 start_address{shader_config.offset}; - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; - shader_info = MakeShaderInfo(env, *cpu_shader_addr); - } - shader_infos[index] = shader_info; - graphics_key.unique_hashes[index] = shader_info->unique_hash; + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "vulkan"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; } - last_valid_shaders = true; - return true; -} + pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); -const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { - auto info = std::make_unique(); - if (const std::optional cached_hash{env.Analyze()}) { - info->unique_hash = *cached_hash; - info->size_bytes = env.CachedSize(); - } else { - // Slow path, not really hit on commercial games - // Build a control flow graph to get the real shader size - main_pools.flow_block.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; - info->unique_hash = env.CalculateHash(); - info->size_bytes = env.ReadSize(); - } - const size_t size_bytes{info->size_bytes}; - const ShaderInfo* const result{info.get()}; - Register(std::move(info), cpu_addr, size_bytes); - return result; + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); } std::unique_ptr PipelineCache::CreateGraphicsPipeline( @@ -815,7 +288,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( size_t env_index{0}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } Shader::Environment& env{*envs[env_index]}; @@ -830,7 +303,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( u32 binding{0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } UNIMPLEMENTED_IF(index == 0); @@ -844,8 +317,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], - key.unique_hashes[index][1])}; + const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } @@ -863,7 +335,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (graphics_key.unique_hashes[index] == 0) { continue; } const auto program{static_cast(index)}; @@ -871,7 +343,6 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; @@ -882,11 +353,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] != u128{}) { + if (key.unique_hashes[index] != 0) { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -902,8 +373,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + VideoCommon::SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); }); } return pipeline; @@ -921,7 +392,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + const auto name{fmt::format("{:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - u128 unique_hash; + u64 unique_hash; u32 shared_memory_size; std::array workgroup_size; @@ -76,16 +77,12 @@ namespace Vulkan { class ComputePipeline; class Device; class DescriptorPool; -class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -struct ShaderInfo { - u128 unique_hash{}; - size_t size_bytes{}; -}; +using VideoCommon::ShaderInfo; struct ShaderPools { void ReleaseContents() { @@ -99,17 +96,16 @@ struct ShaderPools { Shader::ObjectPool flow_block; }; -class PipelineCache final : public VideoCommon::ShaderCache { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); - ~PipelineCache() override; + ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -119,10 +115,6 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: - bool RefreshStages(); - - const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -140,11 +132,6 @@ private: Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -156,16 +143,13 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - std::array shader_infos{}; - bool last_valid_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; - std::string pipeline_cache_filename; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..b8b8eace5 --- /dev/null +++ b/src/video_core/shader_cache.cpp @@ -0,0 +1,233 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include + +#include "common/assert.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" + +namespace VideoCommon { + +void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); +} + +void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); +} + +void ShaderCache::SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); +} + +ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_) + : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + rasterizer{rasterizer_} {} + +bool ShaderCache::RefreshStages(std::array& unique_hashes) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_shaders_valid; + } + dirty[VideoCommon::Dirty::Shaders] = false; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + unique_hashes[index] = 0; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_shaders_valid = false; + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); + } + shader_infos[index] = shader_info; + unique_hashes[index] = shader_info->unique_hash; + } + last_shaders_valid = true; + return true; +} + +const ShaderInfo* ShaderCache::ComputeShader() { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return nullptr; + } + if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { + return shader; + } + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + return MakeShaderInfo(env, *cpu_shader_addr); +} + +ShaderInfo* ShaderCache::TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; +} + +void ShaderCache::Register(std::unique_ptr data, VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); +} + +void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } +} + +void ShaderCache::RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::ranges::sort(marked_for_removal); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } +} + +void ShaderCache::InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { + size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } +} + +void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector& entries = entries_it->second; + + const auto entry_it = std::ranges::find(entries, entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } +} + +void ShaderCache::UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); +} + +void ShaderCache::RemoveShadersFromStorage(std::vector removed_shaders) { + // Remove them from the cache + std::erase_if(storage, [&removed_shaders](const std::unique_ptr& shader) { + return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); + }); +} + +ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { + auto entry = std::make_unique(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; +} + +const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { + auto info = std::make_unique(); + if (const std::optional cached_hash{env.Analyze()}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + Shader::ObjectPool flow_block; + Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..89a4bcc84 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,20 +4,28 @@ #pragma once -#include #include #include #include #include #include -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { -template +class GenericEnvironment; + +struct ShaderInfo { + u64 unique_hash{}; + size_t size_bytes{}; +}; + class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; @@ -25,206 +33,100 @@ class ShaderCache { struct Entry { VAddr addr_start; VAddr addr_end; - T* data; + ShaderInfo* data; bool is_memory_marked = true; - constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + bool Overlaps(VAddr start, VAddr end) const noexcept { return start < addr_end && addr_start < end; } }; public: - virtual ~ShaderCache() = default; - /// @brief Removes shaders inside a given region /// @note Checks for ranges /// @param addr Start address of the invalidation /// @param size Number of bytes of the invalidation - void InvalidateRegion(VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - RemovePendingShaders(); - } + void InvalidateRegion(VAddr addr, size_t size); /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - } + void OnCPUWrite(VAddr addr, size_t size); /// @brief Flushes delayed removal operations - void SyncGuestHost() { - std::scoped_lock lock{invalidation_mutex}; - RemovePendingShaders(); - } + void SyncGuestHost(); + +protected: + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_); + + /// @brief Update the hashes and information of shader stages + /// @param unique_hashes Shader hashes to store into when a stage is enabled + /// @return True no success, false on error + bool RefreshStages(std::array& unique_hashes); + + /// @brief Returns information about the current compute shader + /// @return Pointer to a valid shader, nullptr on error + const ShaderInfo* ComputeShader(); + + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + std::array shader_infos{}; + bool last_shaders_valid = false; + +private: /// @brief Tries to obtain a cached shader starting in a given address /// @note Doesn't check for ranges, the given address has to be the start of the shader /// @param addr Start address of the shader, this doesn't cache for region /// @return Pointer to a valid shader, nullptr when nothing is found - T* TryGet(VAddr addr) const { - std::scoped_lock lock{lookup_mutex}; - - const auto it = lookup_cache.find(addr); - if (it == lookup_cache.end()) { - return nullptr; - } - return it->second->data; - } - -protected: - explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + ShaderInfo* TryGet(VAddr addr) const; /// @brief Register in the cache a given entry /// @param data Shader to store in the cache /// @param addr Start address of the shader that will be registered /// @param size Size in bytes of the shader - void Register(std::unique_ptr data, VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex, lookup_mutex}; - - const VAddr addr_end = addr + size; - Entry* const entry = NewEntry(addr, addr_end, data.get()); - - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - invalidation_cache[page].push_back(entry); - } - - storage.push_back(std::move(data)); + void Register(std::unique_ptr data, VAddr addr, size_t size); - rasterizer.UpdatePagesCachedCount(addr, size, 1); - } - - /// @brief Called when a shader is going to be removed - /// @param shader Shader that will be removed - /// @pre invalidation_cache is locked - /// @pre lookup_mutex is locked - virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} - -private: /// @brief Invalidate pages in a given region /// @pre invalidation_mutex is locked - void InvalidatePagesInRegion(VAddr addr, std::size_t size) { - const VAddr addr_end = addr + size; - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - auto it = invalidation_cache.find(page); - if (it == invalidation_cache.end()) { - continue; - } - InvalidatePageEntries(it->second, addr, addr_end); - } - } + void InvalidatePagesInRegion(VAddr addr, size_t size); /// @brief Remove shaders marked for deletion /// @pre invalidation_mutex is locked - void RemovePendingShaders() { - if (marked_for_removal.empty()) { - return; - } - // Remove duplicates - std::sort(marked_for_removal.begin(), marked_for_removal.end()); - marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), - marked_for_removal.end()); - - std::vector removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); - - std::scoped_lock lock{lookup_mutex}; - - for (Entry* const entry : marked_for_removal) { - removed_shaders.push_back(entry->data); - - const auto it = lookup_cache.find(entry->addr_start); - ASSERT(it != lookup_cache.end()); - lookup_cache.erase(it); - } - marked_for_removal.clear(); - - if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); - } - } + void RemovePendingShaders(); /// @brief Invalidates entries in a given range for the passed page /// @param entries Vector of entries in the page, it will be modified on overlaps /// @param addr Start address of the invalidation /// @param addr_end Non-inclusive end address of the invalidation /// @pre invalidation_mutex is locked - void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end) { - std::size_t index = 0; - while (index < entries.size()) { - Entry* const entry = entries[index]; - if (!entry->Overlaps(addr, addr_end)) { - ++index; - continue; - } - - UnmarkMemory(entry); - RemoveEntryFromInvalidationCache(entry); - marked_for_removal.push_back(entry); - } - } + void InvalidatePageEntries(std::vector& entries, VAddr addr, VAddr addr_end); /// @brief Removes all references to an entry in the invalidation cache /// @param entry Entry to remove from the invalidation cache /// @pre invalidation_mutex is locked - void RemoveEntryFromInvalidationCache(const Entry* entry) { - const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { - const auto entries_it = invalidation_cache.find(page); - ASSERT(entries_it != invalidation_cache.end()); - std::vector& entries = entries_it->second; - - const auto entry_it = std::find(entries.begin(), entries.end(), entry); - ASSERT(entry_it != entries.end()); - entries.erase(entry_it); - } - } + void RemoveEntryFromInvalidationCache(const Entry* entry); /// @brief Unmarks an entry from the rasterizer cache /// @param entry Entry to unmark from memory - void UnmarkMemory(Entry* entry) { - if (!entry->is_memory_marked) { - return; - } - entry->is_memory_marked = false; - - const VAddr addr = entry->addr_start; - const std::size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); - } + void UnmarkMemory(Entry* entry); /// @brief Removes a vector of shaders from a list /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector removed_shaders) { - // Notify removals - for (T* const shader : removed_shaders) { - OnShaderRemoval(shader); - } - - // Remove them from the cache - const auto is_removed = [&removed_shaders](const std::unique_ptr& shader) { - return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != - removed_shaders.end(); - }; - std::erase_if(storage, is_removed); - } + void RemoveShadersFromStorage(std::vector removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked - Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { - auto entry = std::make_unique(Entry{addr, addr_end, data}); - Entry* const entry_pointer = entry.get(); + Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); - lookup_cache.emplace(addr, std::move(entry)); - return entry_pointer; - } + /// @brief Create a new shader entry and register it + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); VideoCore::RasterizerInterface& rasterizer; @@ -233,7 +135,7 @@ private: std::unordered_map> lookup_cache; std::unordered_map> invalidation_cache; - std::vector> storage; + std::vector> storage; std::vector marked_for_removal; }; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..5dccc0097 --- /dev/null +++ b/src/video_core/shader_environment.cpp @@ -0,0 +1,453 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/fs/fs.h" +#include "common/logging/log.h" +#include "shader_recompiler/environment.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_environment.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION = 3; + +constexpr size_t INST_SIZE = sizeof(u64); + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +static u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast(index) << 32) | offset; +} + +static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return Shader::TextureType::Buffer; + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } +} + +GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; +} + +GenericEnvironment::~GenericEnvironment() = default; + +u32 GenericEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +u32 GenericEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 GenericEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +std::array GenericEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +u64 GenericEnvironment::ReadInstruction(u32 address) { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read(program_base + address); +} + +std::optional GenericEnvironment::Analyze() { + const std::optional size{TryFindSize()}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast(*size); + return Common::CityHash64(reinterpret_cast(code.data()), *size); +} + +void GenericEnvironment::SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); +} + +size_t GenericEnvironment::CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; +} + +size_t GenericEnvironment::ReadSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; +} + +bool GenericEnvironment::CanBeSerialized() const noexcept { + return !has_unbound_instructions; +} + +u64 GenericEnvironment::CalculateHash() const { + const size_t size{ReadSize()}; + const auto data{std::make_unique(size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash64(data.get(), size); +} + +void GenericEnvironment::Serialize(std::ofstream& file) const { + const u64 code_size{static_cast(CachedSize())}; + const u64 num_texture_types{static_cast(texture_types.size())}; + const u64 num_cbuf_values{static_cast(cbuf_values.size())}; + + file.write(reinterpret_cast(&code_size), sizeof(code_size)) + .write(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .write(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast(&start_address), sizeof(start_address)) + .write(reinterpret_cast(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast(&stage), sizeof(stage)) + .write(reinterpret_cast(code.data()), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast(&key), sizeof(key)) + .write(reinterpret_cast(&type), sizeof(type)); + } + if (stage == Shader::Stage::Compute) { + file.write(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.write(reinterpret_cast(&sph), sizeof(sph)); + } +} + +std::optional GenericEnvironment::TryFindSize() { + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + index; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; +} + +Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + const Shader::TextureType result{ConvertType(entry)}; + texture_types.emplace(raw, result); + return result; +} + +GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Maxwell::ShaderProgram program, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + stage_index = 0; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + stage_index = 0; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + stage_index = 1; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + stage_index = 2; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + stage_index = 3; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + stage_index = 4; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + break; + } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits::max()); + local_memory_size = static_cast(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; +} + +u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); +} + +ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; + stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; +} + +u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); +} + +void FileEnvironment::Deserialize(std::ifstream& file) { + u64 code_size{}; + u64 num_texture_types{}; + u64 num_cbuf_values{}; + file.read(reinterpret_cast(&code_size), sizeof(code_size)) + .read(reinterpret_cast(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast(&local_memory_size), sizeof(local_memory_size)) + .read(reinterpret_cast(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast(&start_address), sizeof(start_address)) + .read(reinterpret_cast(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast(&stage), sizeof(stage)); + code = std::make_unique(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u32 key; + Shader::TextureType type; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&type), sizeof(type)); + texture_types.emplace(key, type); + } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast(&key), sizeof(key)) + .read(reinterpret_cast(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.read(reinterpret_cast(&sph), sizeof(sph)); + } +} + +u64 FileEnvironment::ReadInstruction(u32 address) { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; +} + +u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { + const auto it{texture_types.find(handle)}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +u32 FileEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 FileEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +u32 FileEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +std::array FileEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename) try { + std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + if (file.tellp() == 0) { + // Write header + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast(&CACHE_VERSION), sizeof(CACHE_VERSION)); + } + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast(envs.size())}; + file.write(reinterpret_cast(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics) try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return; + } + while (file.tellg() != end) { + if (stop_loading.stop_requested()) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { + env.Deserialize(file); + } + if (envs.front().ShaderStage() == Shader::Stage::Compute) { + load_compute(file, std::move(envs.front())); + } else { + load_graphics(file, std::move(envs)); + } + } + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..37d712045 --- /dev/null +++ b/src/video_core/shader_environment.h @@ -0,0 +1,198 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/unique_function.h" +#include "shader_recompiler/environment.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +class Memorymanager; +} + +namespace VideoCommon { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +class GenericEnvironment : public Shader::Environment { +public: + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~GenericEnvironment() override; + + [[nodiscard]] u32 TextureBoundBuffer() const final; + + [[nodiscard]] u32 LocalMemorySize() const final; + + [[nodiscard]] u32 SharedMemorySize() const final; + + [[nodiscard]] std::array WorkgroupSize() const final; + + [[nodiscard]] u64 ReadInstruction(u32 address) final; + + [[nodiscard]] std::optional Analyze(); + + void SetCachedSize(size_t size_bytes); + + [[nodiscard]] size_t CachedSize() const noexcept; + + [[nodiscard]] size_t ReadSize() const noexcept; + + [[nodiscard]] bool CanBeSerialized() const noexcept; + + [[nodiscard]] u64 CalculateHash() const; + + void Serialize(std::ofstream& file) const; + +protected: + std::optional TryFindSize(); + + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + u32 raw); + + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array workgroup_size{}; + + u32 read_lowest = std::numeric_limits::max(); + u32 read_highest = 0; + + u32 cached_lowest = std::numeric_limits::max(); + u32 cached_highest = 0; + + bool has_unbound_instructions = false; +}; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, + GPUVAddr program_base_, u32 start_address_); + + ~GraphicsEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~ComputeEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; + +class FileEnvironment final : public Shader::Environment { +public: + FileEnvironment() = default; + ~FileEnvironment() override = default; + + FileEnvironment& operator=(FileEnvironment&&) noexcept = default; + FileEnvironment(FileEnvironment&&) noexcept = default; + + FileEnvironment& operator=(const FileEnvironment&) = delete; + FileEnvironment(const FileEnvironment&) = delete; + + void Deserialize(std::ifstream& file); + + [[nodiscard]] u64 ReadInstruction(u32 address) override; + + [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + + [[nodiscard]] u32 LocalMemorySize() const override; + + [[nodiscard]] u32 SharedMemorySize() const override; + + [[nodiscard]] u32 TextureBoundBuffer() const override; + + [[nodiscard]] std::array WorkgroupSize() const override; + +private: + std::unique_ptr code; + std::unordered_map texture_types; + std::unordered_map cbuf_values; + std::array workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; + +void SerializePipeline(std::span key, std::span envs, + const std::filesystem::path& filename); + +template +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { + static_assert(std::is_trivially_copyable_v); + static_assert(std::has_unique_object_representations_v); + SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), + std::span(envs.data(), envs.size()), filename); +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction load_compute, + Common::UniqueFunction> load_graphics); + +} // namespace VideoCommon -- cgit v1.2.3 From d621e96d0de212cc16897eadf71e8a1b2e1eb5dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 04:28:34 -0300 Subject: shader: Initial OpenGL implementation --- src/shader_recompiler/frontend/ir/ir_emitter.cpp | 4 + src/shader_recompiler/frontend/ir/ir_emitter.h | 1 + .../translate/impl/move_special_register.cpp | 7 + src/video_core/CMakeLists.txt | 4 + src/video_core/buffer_cache/buffer_cache.h | 53 ++-- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 37 ++- src/video_core/renderer_opengl/gl_buffer_cache.h | 40 ++- .../renderer_opengl/gl_compute_program.cpp | 178 +++++++++++++ .../renderer_opengl/gl_compute_program.h | 83 ++++++ src/video_core/renderer_opengl/gl_device.cpp | 89 ------- src/video_core/renderer_opengl/gl_device.h | 16 -- .../renderer_opengl/gl_graphics_program.cpp | 296 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_program.h | 105 ++++++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 23 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 275 ++++++++++++++++++- src/video_core/renderer_opengl/gl_shader_cache.h | 98 ++++--- .../renderer_opengl/gl_shader_manager.cpp | 146 ---------- src/video_core/renderer_opengl/gl_shader_manager.h | 73 +---- .../renderer_opengl/gl_texture_cache.cpp | 257 ++++++------------ src/video_core/renderer_opengl/gl_texture_cache.h | 29 +- src/video_core/renderer_opengl/maxwell_to_gl.h | 108 ++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 17 +- src/video_core/renderer_opengl/renderer_opengl.h | 5 +- src/video_core/renderer_opengl/util_shaders.cpp | 13 +- src/video_core/renderer_vulkan/pipeline_helper.h | 17 -- src/video_core/renderer_vulkan/vk_buffer_cache.h | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 22 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 11 - src/video_core/shader_cache.cpp | 17 ++ src/video_core/shader_cache.h | 23 +- src/video_core/shader_environment.cpp | 4 +- src/video_core/shader_environment.h | 16 -- src/video_core/texture_cache/formatter.cpp | 4 +- src/video_core/texture_cache/formatter.h | 3 +- src/video_core/textures/texture.h | 9 + src/video_core/vulkan_common/vulkan_device.cpp | 2 +- 38 files changed, 1427 insertions(+), 705 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a..5913fdeff 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; } +Value IREmitter::LocalInvocationId() { + return Inst(Opcode::LocalInvocationId); +} + U32 IREmitter::LocalInvocationIdX() { return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; } diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d..a12919283 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h @@ -95,6 +95,7 @@ public: [[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdZ(); + [[nodiscard]] Value LocalInvocationId(); [[nodiscard]] U32 LocalInvocationIdX(); [[nodiscard]] U32 LocalInvocationIdY(); [[nodiscard]] U32 LocalInvocationIdZ(); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { case SpecialRegister::SR_INVOCATION_INFO: // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); return ir.Imm32(0x00ff'0000); + case SpecialRegister::SR_TID: { + const IR::Value tid{ir.LocalInvocationId()}; + return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, + IR::U32{ir.CompositeExtract(tid, 1)}, + ir.Imm32(16), ir.Imm32(8)), + IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); + } case SpecialRegister::SR_TID_X: return ir.LocalInvocationIdX(); case SpecialRegister::SR_TID_Y: diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f5..b008c37c0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,10 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h + renderer_opengl/gl_compute_program.cpp + renderer_opengl/gl_compute_program.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h + renderer_opengl/gl_graphics_program.cpp + renderer_opengl/gl_graphics_program.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -70,8 +70,8 @@ class BufferCache { P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -154,7 +154,7 @@ public: void UnbindGraphicsTextureBuffers(size_t stage); void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written); + PixelFormat format, bool is_written, bool is_image); void UnbindComputeStorageBuffers(); @@ -164,7 +164,7 @@ public: void UnbindComputeTextureBuffers(); void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, - bool is_written); + bool is_written, bool is_image); void FlushCachedWrites(); @@ -197,6 +197,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template @@ -366,7 +367,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector slot_buffers; DelayedDestructionRing delayed_destruction_ring; @@ -394,8 +394,10 @@ private: std::array enabled_texture_buffers{}; std::array written_texture_buffers{}; + std::array image_texture_buffers{}; u32 enabled_compute_texture_buffers = 0; u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array fast_bound_uniform_buffers{}; @@ -431,8 +433,8 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -703,13 +705,18 @@ template void BufferCache

::UnbindGraphicsTextureBuffers(size_t stage) { enabled_texture_buffers[stage] = 0; written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; } template void BufferCache

::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, - u32 size, PixelFormat format, bool is_written) { + u32 size, PixelFormat format, bool is_written, + bool is_image) { enabled_texture_buffers[stage] |= 1U << tbo_index; written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -717,6 +724,7 @@ template void BufferCache

::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template @@ -737,13 +745,17 @@ template void BufferCache

::UnbindComputeTextureBuffers() { enabled_compute_texture_buffers = 0; written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; } template void BufferCache

::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, - PixelFormat format, bool is_written) { + PixelFormat format, bool is_written, bool is_image) { enabled_compute_texture_buffers |= 1U << tbo_index; written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); } @@ -1057,7 +1069,6 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { template void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { - u32 binding_index = 0; ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { const TextureBufferBinding& binding = texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1066,9 +1077,12 @@ void BufferCache

::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1139,7 +1153,6 @@ void BufferCache

::BindHostComputeStorageBuffers() { template void BufferCache

::BindHostComputeTextureBuffers() { - u32 binding_index = 0; ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { const TextureBufferBinding& binding = compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; @@ -1148,9 +1161,12 @@ void BufferCache

::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; - if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { - runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); - ++binding_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } } else { runtime.BindTextureBuffer(buffer, offset, size, format); } @@ -1339,11 +1355,10 @@ void BufferCache

::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..2d0ef1307 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,14 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL { namespace { +using VideoCore::Surface::PixelFormat; + struct BindlessSSBO { GLuint64EXT address; GLsizei length; @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { glMakeNamedBufferResidentNV(buffer.handle, access); } +GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { + const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { + return offset == view.offset && size == view.size && format == view.format; + })}; + if (it != views.end()) { + return it->texture.handle; + } + OGLTexture texture; + texture.Create(GL_TEXTURE_BUFFER); + const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; + glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + views.push_back({ + .offset = offset, + .size = size, + .format = format, + .texture = std::move(texture), + }); + return views.back().texture.handle; +} + BufferCacheRuntime::BufferCacheRuntime(const Device& device_) : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, use_assembly_shaders{device.UseAssemblyShaders()}, @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, static_cast(size)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); } else { - const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; + const GLuint base_binding = graphics_base_storage_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), static_cast(offset), static_cast(size)); @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, static_cast(offset), static_cast(size)); } +void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + PixelFormat format) { + *texture_handles++ = buffer.View(offset, size, format); +} + +void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { + *image_handles++ = buffer.View(offset, size, format); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97..4986c65fd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -32,6 +32,8 @@ public: void MakeResident(GLenum access) noexcept; + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); + [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { return address; } @@ -41,9 +43,17 @@ public: } private: + struct BufferView { + u32 offset; + u32 size; + VideoCore::Surface::PixelFormat format; + OGLTexture texture; + }; + GLuint64EXT address = 0; OGLBuffer buffer; GLenum current_residency_access = GL_NONE; + std::vector views; }; class BufferCacheRuntime { @@ -75,13 +85,19 @@ public: void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); + void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + + void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, + VideoCore::Surface::PixelFormat format); + void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { if (use_assembly_shaders) { const GLuint handle = fast_uniforms[stage][binding_index].handle; const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, fast_uniforms[stage][binding_index].handle, 0, @@ -103,7 +119,7 @@ public: std::span BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { const auto [mapped_span, offset] = stream_buffer->Request(static_cast(size)); - const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; + const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), static_cast(offset), static_cast(size)); @@ -118,6 +134,19 @@ public: return has_fast_buffer_sub_data; } + void SetBaseUniformBindings(const std::array& bindings) { + graphics_base_uniform_bindings = bindings; + } + + void SetBaseStorageBindings(const std::array& bindings) { + graphics_base_storage_bindings = bindings; + } + + void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { + texture_handles = texture_handles_; + image_handles = image_handles_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -133,6 +162,11 @@ private: u32 max_attributes = 0; + std::array graphics_base_uniform_bindings{}; + std::array graphics_base_storage_bindings{}; + GLuint* texture_handles = nullptr; + GLuint* image_handles = nullptr; + std::optional stream_buffer; std::array, @@ -155,8 +189,8 @@ struct BufferCacheParams { static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; - static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; static constexpr bool USE_MEMORY_MAPS = false; + static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; }; using BufferCache = VideoCommon::BufferCache; diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 000000000..d5ef65439 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -0,0 +1,178 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputeProgramKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, + program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputeProgram::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 000000000..64a75d44d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputeProgramKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputeProgramKey&) const noexcept; + + bool operator!=(const ComputeProgramKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputeProgram { +public: + explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, OGLProgram program_, + const Shader::Info& info_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + OGLProgram program; + Shader::Info info; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..18bbc4c1f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -22,34 +22,11 @@ namespace OpenGL { namespace { -// One uniform block is reserved for emulation purposes -constexpr u32 ReservedUniformBlocks = 1; - -constexpr u32 NumStages = 5; - constexpr std::array LIMIT_UBOS = { GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, }; -constexpr std::array LIMIT_SSBOS = { - GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, - GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, - GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, -}; -constexpr std::array LIMIT_SAMPLERS = { - GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, - GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, - GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, - GL_MAX_TEXTURE_IMAGE_UNITS, - GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, -}; -constexpr std::array LIMIT_IMAGES = { - GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, - GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, - GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, -}; template T GetInteger(GLenum pname) { @@ -82,15 +59,6 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -u32 Extract(u32& base, u32& num, u32 amount, std::optional limit = {}) { - ASSERT(num >= amount); - if (limit) { - amount = std::min(amount, GetInteger(*limit)); - } - num -= amount; - return std::exchange(base, base + amount); -} - std::array BuildMaxUniformBuffers() noexcept { std::array max; std::ranges::transform(LIMIT_UBOS, max.begin(), @@ -98,62 +66,6 @@ std::array BuildMaxUniformBuffers() noexcep return max; } -std::array BuildBaseBindings() noexcept { - std::array bindings; - - static constexpr std::array stage_swizzle{0, 1, 2, 3, 4}; - const u32 total_ubos = GetInteger(GL_MAX_UNIFORM_BUFFER_BINDINGS); - const u32 total_ssbos = GetInteger(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); - const u32 total_samplers = GetInteger(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); - - u32 num_ubos = total_ubos - ReservedUniformBlocks; - u32 num_ssbos = total_ssbos; - u32 num_samplers = total_samplers; - - u32 base_ubo = ReservedUniformBlocks; - u32 base_ssbo = 0; - u32 base_samplers = 0; - - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - bindings[stage] = { - Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), - Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), - Extract(base_samplers, num_samplers, total_samplers / NumStages, - LIMIT_SAMPLERS[stage])}; - } - - u32 num_images = GetInteger(GL_MAX_IMAGE_UNITS); - u32 base_images = 0; - - // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. - // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the - // fragment stage, and at least 1 for the rest of the stages. - // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. - - // Reserve at least 4 image bindings on the fragment stage. - bindings[4].image = - Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); - - // This is guaranteed to be at least 1. - const u32 total_extracted_images = num_images / (NumStages - 1); - - // Reserve the other image bindings. - for (std::size_t i = 0; i < NumStages; ++i) { - const std::size_t stage = stage_swizzle[i]; - if (stage == 4) { - continue; - } - bindings[stage].image = - Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); - } - - // Compute doesn't care about any of this. - bindings[5] = {0, 0, 0, 0}; - - return bindings; -} - bool IsASTCSupported() { static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; static constexpr std::array formats = { @@ -225,7 +137,6 @@ Device::Device() { } max_uniform_buffers = BuildMaxUniformBuffers(); - base_bindings = BuildBaseBindings(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..152a3acd3 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -12,13 +12,6 @@ namespace OpenGL { class Device { public: - struct BaseBindings { - u32 uniform_buffer{}; - u32 shader_storage_buffer{}; - u32 sampler{}; - u32 image{}; - }; - explicit Device(); explicit Device(std::nullptr_t); @@ -28,14 +21,6 @@ public: return max_uniform_buffers[static_cast(shader_type)]; } - const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { - return base_bindings[stage_index]; - } - - const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { - return GetBaseBindings(static_cast(shader_type)); - } - size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -134,7 +119,6 @@ private: std::string vendor_name; std::array max_uniform_buffers{}; - std::array base_bindings{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 000000000..fd0958719 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -0,0 +1,296 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +size_t GraphicsProgramKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + const std::array& infos) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + const auto& info{stage_infos[stage]}; + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + for (const auto& desc : info.constant_buffer_descriptors) { + base_uniform_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.storage_buffers_descriptors) { + base_storage_bindings[stage + 1] += desc.count; + } + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers[stage] += desc.count; + num_textures += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers[stage] += desc.count; + num_images += desc.count; + } + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsProgram::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } + texture_cache.UpdateRenderTargets(false); + + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + program_manager.BindProgram(program.handle); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 000000000..5adf3f41e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -0,0 +1,105 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array layouts; + std::array, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsProgramKey&) const noexcept; + + bool operator!=(const GraphicsProgramKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsProgram { +public: + explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, const std::array& infos); + + void Configure(bool is_indexed); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863..e527b76ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), + shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, + buffer_cache, program_manager, state_tracker), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - // Setup shaders and their used resources. - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindGraphicsPipeline(); + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); BeginTransformFeedback(primitive_mode); @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } - EndTransformFeedback(); ++num_queued_commands; @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - UNREACHABLE_MSG("Not implemented"); + ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + if (!program) { + return; + } + program->Configure(); + const auto& qmd{kepler_compute.launch_description}; + glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.display_texture = image_view->Handle(ImageViewType::e2D); + screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40..c9ca1f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,11 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" @@ -25,17 +30,281 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" namespace OpenGL { +namespace { +// FIXME: Move this somewhere else +const Shader::Profile profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = true, + .support_viewport_mask = true, + .support_typeless_image_loads = true, + .support_demote_to_helper_invocation = false, + .warp_size_potentially_larger_than_guest = true, + .support_int64_atomics = false, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + + .generic_input_types = {}, + .convert_depth_mode = false, + .force_early_z = false, + + .tess_primitive = {}, + .tess_spacing = {}, + .tess_clockwise = false, + + .input_topology = Shader::InputTopology::Triangles, + + .fixed_state_point_size = std::nullopt, + + .alpha_test_func = Shader::CompareFunction::Always, + .alpha_test_reference = 0.0f, + + .y_negate = false, + + .xfb_varyings = {}, +}; + +using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::GraphicsEnvironment; + +template +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); +} + +void AddShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); + + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (!Settings::values.renderer_debug) { + return; + } + GLint shader_status{}; + glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} + +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} +} // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, - emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ + state_tracker_} {} ShaderCache::~ShaderCache() = default; +GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { + if (!RefreshStages(graphics_key.unique_hashes)) { + return nullptr; + } + const auto& regs{maxwell3d.regs}; + graphics_key.raw = 0; + graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 + ? regs.draw.topology.Value() + : Maxwell::PrimitiveTopology{}); + graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); + graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); + graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + + const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; + auto& program{pair->second}; + if (is_new) { + program = CreateGraphicsProgram(); + } + return program.get(); +} + +ComputeProgram* ShaderCache::CurrentComputeProgram() { + const VideoCommon::ShaderInfo* const shader{ComputeShader()}; + if (!shader) { + return nullptr; + } + const auto& qmd{kepler_compute.launch_description}; + const ComputeProgramKey key{ + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, + .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, + }; + const auto [pair, is_new]{compute_cache.try_emplace(key)}; + auto& pipeline{pair->second}; + if (!is_new) { + return pipeline.get(); + } + pipeline = CreateComputeProgram(key, shader); + return pipeline.get(); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram() { + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); + + main_pools.ReleaseContents(); + return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); +} + +std::unique_ptr ShaderCache::CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + size_t env_index{0}; + std::array programs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + } + std::array infos{}; + + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + + Shader::Backend::SPIRV::Bindings binding; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + UNIMPLEMENTED_IF(index == 0); + + Shader::IR::Program& program{programs[index]}; + const size_t stage_index{index - 1}; + infos[stage_index] = &program.info; + + const std::vector code{EmitSPIRV(profile, program, binding)}; + FILE* file = fopen("D:\\shader.spv", "wb"); + fwrite(code.data(), 4, code.size(), file); + fclose(file); + AddShader(Stage(stage_index), gl_program.handle, code); + } + LinkProgram(gl_program.handle); + + return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, + program_manager, state_tracker, std::move(gl_program), + infos); +} + +std::unique_ptr ShaderCache::CreateComputeProgram( + const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + env.SetCachedSize(shader->size_bytes); + + main_pools.ReleaseContents(); + return CreateComputeProgram(main_pools, key, env, true); +} + +std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel) { + LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); + + Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; + Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + Shader::Backend::SPIRV::Bindings binding; + const std::vector code{EmitSPIRV(profile, program, binding)}; + OGLProgram gl_program; + gl_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); + LinkProgram(gl_program.handle); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, + program_manager, std::move(gl_program), program.info); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17c..b479d073a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,20 +5,18 @@ #pragma once #include -#include -#include -#include -#include -#include #include -#include -#include #include #include "common/common_types.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_compute_program.h" +#include "video_core/renderer_opengl/gl_graphics_program.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -32,64 +30,62 @@ class EmuWindow; namespace OpenGL { class Device; +class ProgramManager; class RasterizerOpenGL; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - - std::array unique_hashes; - std::array color_formats; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, u32> tessellation_primitive; - BitField<8, 2, u32> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - u32 padding; - TransformFeedbackState xfb_state; - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); -class GraphicsProgram { -public: -private: + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; }; class ShaderCache : public VideoCommon::ShaderCache { public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, - Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); + Tegra::MemoryManager& gpu_memory_, const Device& device_, + TextureCache& texture_cache_, BufferCache& buffer_cache_, + ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + + [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + private: + std::unique_ptr CreateGraphicsProgram(); + + std::unique_ptr CreateGraphicsProgram( + ShaderPools& pools, const GraphicsProgramKey& key, + std::span envs, bool build_in_parallel); + + std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, + const VideoCommon::ShaderInfo* shader); + + std::unique_ptr CreateComputeProgram(ShaderPools& pools, + const ComputeProgramKey& key, + Shader::Environment& env, + bool build_in_parallel); + Core::Frontend::EmuWindow& emu_window; - Tegra::GPU& gpu; const Device& device; + TextureCache& texture_cache; + BufferCache& buffer_cache; + ProgramManager& program_manager; + StateTracker& state_tracker; + + GraphicsProgramKey graphics_key{}; + + ShaderPools main_pools; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -1,149 +1,3 @@ // Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. - -#include - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -namespace { - -void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { - if (current == old) { - return; - } - if (current == 0) { - if (enabled) { - enabled = false; - glDisable(stage); - } - return; - } - if (!enabled) { - enabled = true; - glEnable(stage); - } - glBindProgramARB(stage, current); -} - -} // Anonymous namespace - -ProgramManager::ProgramManager(const Device& device) - : use_assembly_programs{device.UseAssemblyShaders()} { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - } else { - graphics_pipeline.Create(); - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -ProgramManager::~ProgramManager() = default; - -void ProgramManager::BindCompute(GLuint program) { - if (use_assembly_programs) { - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); - } else { - is_graphics_bound = false; - glUseProgram(program); - } -} - -void ProgramManager::BindGraphicsPipeline() { - if (!use_assembly_programs) { - UpdateSourcePrograms(); - } -} - -void ProgramManager::BindHostPipeline(GLuint pipeline) { - if (use_assembly_programs) { - if (geometry_enabled) { - geometry_enabled = false; - old_state.geometry = 0; - glDisable(GL_GEOMETRY_PROGRAM_NV); - } - } else { - if (!is_graphics_bound) { - glUseProgram(0); - } - } - glBindProgramPipeline(pipeline); -} - -void ProgramManager::RestoreGuestPipeline() { - if (use_assembly_programs) { - glBindProgramPipeline(0); - } else { - glBindProgramPipeline(graphics_pipeline.handle); - } -} - -void ProgramManager::BindHostCompute(GLuint program) { - if (use_assembly_programs) { - glDisable(GL_COMPUTE_PROGRAM_NV); - } - glUseProgram(program); - is_graphics_bound = false; -} - -void ProgramManager::RestoreGuestCompute() { - if (use_assembly_programs) { - glEnable(GL_COMPUTE_PROGRAM_NV); - glUseProgram(0); - } -} - -void ProgramManager::UseVertexShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); - } - current_state.vertex = program; -} - -void ProgramManager::UseGeometryShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); - } - current_state.geometry = program; -} - -void ProgramManager::UseFragmentShader(GLuint program) { - if (use_assembly_programs) { - BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); - } - current_state.fragment = program; -} - -void ProgramManager::UpdateSourcePrograms() { - if (!is_graphics_bound) { - is_graphics_bound = true; - glUseProgram(0); - } - - const GLuint handle = graphics_pipeline.handle; - const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { - if (current == old) { - return; - } - glUseProgramStages(handle, stage, current); - }; - update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); - update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); - update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); - - old_state = current_state; -} - -void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { - const auto& regs = maxwell.regs; - - // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. - y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..70781d6f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,79 +4,24 @@ #pragma once -#include - #include -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/maxwell_to_gl.h" - namespace OpenGL { -class Device; - -/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -/// Not following that rule will cause problems on some AMD drivers. -struct alignas(16) MaxwellUniformData { - void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); - - GLfloat y_direction; -}; -static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); -static_assert(sizeof(MaxwellUniformData) < 16384, - "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); - class ProgramManager { public: - explicit ProgramManager(const Device& device); - ~ProgramManager(); - - /// Binds a compute program - void BindCompute(GLuint program); - - /// Updates bound programs. - void BindGraphicsPipeline(); - - /// Binds an OpenGL pipeline object unsynchronized with the guest state. - void BindHostPipeline(GLuint pipeline); + void BindProgram(GLuint program) { + if (bound_program == program) { + return; + } + bound_program = program; + glUseProgram(program); + } - /// Rewinds BindHostPipeline state changes. - void RestoreGuestPipeline(); - - /// Binds an OpenGL GLSL program object unsynchronized with the guest state. - void BindHostCompute(GLuint program); - - /// Rewinds BindHostCompute state changes. - void RestoreGuestCompute(); - - void UseVertexShader(GLuint program); - void UseGeometryShader(GLuint program); - void UseFragmentShader(GLuint program); + void RestoreGuestCompute() {} private: - struct PipelineState { - GLuint vertex = 0; - GLuint geometry = 0; - GLuint fragment = 0; - }; - - /// Update GLSL programs. - void UpdateSourcePrograms(); - - OGLPipeline graphics_pipeline; - - PipelineState current_state; - PipelineState old_state; - - bool use_assembly_programs = false; - - bool is_graphics_bound = true; - - bool vertex_enabled = false; - bool geometry_enabled = false; - bool fragment_enabled = false; + GLuint bound_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf84218..7053be161 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,9 +24,7 @@ #include "video_core/textures/decoders.h" namespace OpenGL { - namespace { - using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; using Tegra::Texture::TextureType; @@ -59,107 +57,6 @@ struct CopyRegion { GLsizei depth; }; -struct FormatTuple { - GLenum internal_format; - GLenum format = GL_NONE; - GLenum type = GL_NONE; -}; - -constexpr std::array FORMAT_TABLE = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM - {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM - {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM - {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM - {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM - {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM - {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM - {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM - {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT - {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT - {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT - {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT - {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT - {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM - {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT - {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT - {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM - {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM - {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM - {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT - {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT - {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM - {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM - {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB - {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB - {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB - {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB - {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM - {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, - GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT -}}; - constexpr std::array ACCELERATED_FORMATS{ GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, }; -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); - return FORMAT_TABLE[static_cast(pixel_format)]; -} - GLenum ImageTarget(const VideoCommon::ImageInfo& info) { switch (info.type) { case ImageType::e1D: @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { return GL_NONE; } -GLenum ImageTarget(ImageViewType type, int num_samples = 1) { +GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { const bool is_multisampled = num_samples > 1; switch (type) { - case ImageViewType::e1D: + case Shader::TextureType::Color1D: return GL_TEXTURE_1D; - case ImageViewType::e2D: + case Shader::TextureType::Color2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; - case ImageViewType::Cube: + case Shader::TextureType::ColorCube: return GL_TEXTURE_CUBE_MAP; - case ImageViewType::e3D: + case Shader::TextureType::Color3D: return GL_TEXTURE_3D; - case ImageViewType::e1DArray: + case Shader::TextureType::ColorArray1D: return GL_TEXTURE_1D_ARRAY; - case ImageViewType::e2DArray: + case Shader::TextureType::ColorArray2D: return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; - case ImageViewType::CubeArray: + case Shader::TextureType::ColorArrayCube: return GL_TEXTURE_CUBE_MAP_ARRAY; - case ImageViewType::Rect: - return GL_TEXTURE_RECTANGLE; - case ImageViewType::Buffer: + case Shader::TextureType::Buffer: return GL_TEXTURE_BUFFER; } UNREACHABLE_MSG("Invalid image view type={}", type); @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arrayflags & VideoCommon::ImageViewFlagBits::Slice)) { - const GLuint texture = image_view->DefaultHandle(); - glNamedFramebufferTexture(fbo, attachment, texture, 0); + glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); return; } - const GLuint texture = image_view->Handle(ImageViewType::e3D); + const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); if (image_view->range.extent.layers > 1) { // TODO: OpenGL doesn't support rendering to a fixed number of slices glNamedFramebufferTexture(fbo, attachment, texture, 0); @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; for (size_t i = 0; i < TARGETS.size(); ++i) { const GLenum target = TARGETS[i]; - for (const FormatTuple& tuple : FORMAT_TABLE) { + for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { const GLenum format = tuple.internal_format; GLint compat_class; GLint compat_type; @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); null_image_3d.Create(GL_TEXTURE_3D); - null_image_rect.Create(GL_TEXTURE_RECTANGLE); glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); - glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); std::array new_handles; glGenTextures(static_cast(new_handles.size()), new_handles.data()); @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, GL_R8, 0, 1, 0, 6); const std::array texture_handles{ - null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, - null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, - null_image_view_2d_array.handle, null_image_view_cube.handle, + null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, + null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, + null_image_view_cube.handle, }; for (const GLuint handle : texture_handles) { static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); } - const auto set_view = [this](ImageViewType type, GLuint handle) { + const auto set_view = [this](Shader::TextureType type, GLuint handle) { if (device.HasDebuggingToolAttached()) { const std::string name = fmt::format("NullImage {}", type); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } null_image_views[static_cast(type)] = handle; }; - set_view(ImageViewType::e1D, null_image_view_1d.handle); - set_view(ImageViewType::e2D, null_image_view_2d.handle); - set_view(ImageViewType::Cube, null_image_view_cube.handle); - set_view(ImageViewType::e3D, null_image_3d.handle); - set_view(ImageViewType::e1DArray, null_image_1d_array.handle); - set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); - set_view(ImageViewType::CubeArray, null_image_cube_array.handle); - set_view(ImageViewType::Rect, null_image_rect.handle); + set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); + set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); + set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); + set_view(Shader::TextureType::Color3D, null_image_3d.handle); + set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); + set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); + set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); } TextureCacheRuntime::~TextureCacheRuntime() = default; @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; } else { - const auto& tuple = GetFormatTuple(info.format); + const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); gl_internal_format = tuple.internal_format; gl_format = tuple.format; gl_type = tuple.type; @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); break; case GL_TEXTURE_BUFFER: - buffer.Create(); - glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); + UNREACHABLE(); break; default: UNREACHABLE_MSG("Invalid target=0x{:x}", target); @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, } } -void Image::UploadMemory(const ImageBufferMap& map, - std::span copies) { - for (const VideoCommon::BufferCopy& copy : copies) { - glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, - copy.dst_offset, copy.size); - } -} - void Image::DownloadMemory(ImageBufferMap& map, std::span copies) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI if (True(image.flags & ImageFlagBits::Converted)) { internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; } else { - internal_format = GetFormatTuple(format).internal_format; + internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } VideoCommon::SubresourceRange flatten_range = info.range; std::array handles; @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI [[fallthrough]]; case ImageViewType::e1D: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); break; case ImageViewType::e2DArray: flatten_range.extent.layers = 1; @@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .extent = {.levels = 1, .layers = 1}, }; glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); - break; + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + } else { + glGenTextures(2, handles.data()); + SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, + info.range); } - glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); break; case ImageViewType::e3D: glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); + SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); break; case ImageViewType::CubeArray: flatten_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: glGenTextures(2, handles.data()); - SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); - SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); + SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); + SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); break; case ImageViewType::Rect: - glGenTextures(1, handles.data()); - SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); + UNIMPLEMENTED(); break; case ImageViewType::Buffer: - glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); - SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); + UNREACHABLE(); + break; + } + switch (info.type) { + case ImageViewType::e1D: + default_handle = Handle(Shader::TextureType::Color1D); + break; + case ImageViewType::e1DArray: + default_handle = Handle(Shader::TextureType::ColorArray1D); + break; + case ImageViewType::e2D: + default_handle = Handle(Shader::TextureType::Color2D); + break; + case ImageViewType::e2DArray: + default_handle = Handle(Shader::TextureType::ColorArray2D); + break; + case ImageViewType::e3D: + default_handle = Handle(Shader::TextureType::Color3D); + break; + case ImageViewType::Cube: + default_handle = Handle(Shader::TextureType::ColorCube); + break; + case ImageViewType::CubeArray: + default_handle = Handle(Shader::TextureType::ColorArrayCube); + break; + default: break; } - default_handle = Handle(info.type); } +ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, + const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) + : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, + buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info) : VideoCommon::ImageViewBase{info, view_info} {} @@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, +void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range) { - if (info.type == ImageViewType::Buffer) { - // TODO: Take offset from buffer cache - glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, - image.guest_size_bytes); - } else { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, - view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); - } + const GLuint parent = image.texture.handle; + const GLenum target = ImageTarget(view_type, image.info.num_samples); + glTextureView(handle, target, parent, internal_format, view_range.base.level, + view_range.extent.levels, view_range.base.layer, view_range.extent.layers); + if (!info.IsRenderTarget()) { + ApplySwizzle(handle, format, info.Swizzle()); } if (device.HasDebuggingToolAttached()) { - const std::string name = VideoCommon::Name(*this, view_type); + const std::string name = VideoCommon::Name(*this); glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); } stored_views.emplace_back().handle = handle; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e650..2e3e02b79 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/texture_cache.h" @@ -127,13 +128,12 @@ private: OGLTexture null_image_1d_array; OGLTexture null_image_cube_array; OGLTexture null_image_3d; - OGLTexture null_image_rect; OGLTextureView null_image_view_1d; OGLTextureView null_image_view_2d; OGLTextureView null_image_view_2d_array; OGLTextureView null_image_view_cube; - std::array null_image_views; + std::array null_image_views{}; }; class Image : public VideoCommon::ImageBase { @@ -154,8 +154,6 @@ public: void UploadMemory(const ImageBufferMap& map, std::span copies); - void UploadMemory(const ImageBufferMap& map, std::span copies); - void DownloadMemory(ImageBufferMap& map, std::span copies); GLuint StorageHandle() noexcept; @@ -170,7 +168,6 @@ private: void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); OGLTexture texture; - OGLBuffer buffer; OGLTextureView store_view; GLenum gl_internal_format = GL_NONE; GLenum gl_format = GL_NONE; @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, + const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); - [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { - return views[static_cast(query_type)]; + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { + return views[static_cast(handle_type)]; } [[nodiscard]] GLuint DefaultHandle() const noexcept { @@ -198,15 +197,25 @@ public: return internal_format; } + [[nodiscard]] GPUVAddr GpuAddr() const noexcept { + return gpu_addr; + } + + [[nodiscard]] u32 BufferSize() const noexcept { + return buffer_size; + } + private: - void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, + void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, const VideoCommon::ImageViewInfo& info, VideoCommon::SubresourceRange view_range); - std::array views{}; + std::array views{}; std::vector stored_views; - GLuint default_handle = 0; GLenum internal_format = GL_NONE; + GLuint default_handle = 0; + GPUVAddr gpu_addr = 0; + u32 buffer_size = 0; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -5,12 +5,120 @@ #pragma once #include + #include "video_core/engines/maxwell_3d.h" +#include "video_core/surface.h" namespace OpenGL::MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +struct FormatTuple { + GLenum internal_format; + GLenum format = GL_NONE; + GLenum type = GL_NONE; +}; + +constexpr std::array FORMAT_TABLE = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM + {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM + {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM + {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM + {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM + {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM + {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM + {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM + {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT + {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT + {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT + {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT + {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM + {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM + {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT + {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, + GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT +}}; + +inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { + ASSERT(static_cast(pixel_format) < FORMAT_TABLE.size()); + return FORMAT_TABLE[static_cast(pixel_format)]; +} + inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { case Maxwell::VertexAttribute::Type::UnsignedNorm: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..4e77ef808 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, - program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { OGLShader fragment_shader; fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - vertex_program.Create(true, false, vertex_shader.handle); - fragment_program.Create(true, false, fragment_shader.handle); - - pipeline.Create(); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); + present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); // Generate presentation sampler present_sampler.Create(); @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, - std::data(ortho_matrix)); + program_manager.BindProgram(present_program.handle); + glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { state_tracker.NotifyClipControl(); state_tracker.NotifyAlphaTest(); - program_manager.BindHostPipeline(pipeline.handle); - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); glEnable(GL_CULL_FACE); if (screen_info.display_srgb) { @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glClear(GL_COLOR_BUFFER_BIT); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - program_manager.RestoreGuestPipeline(); + // TODO + // program_manager.RestoreGuestPipeline(); } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..b3ee55665 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,6 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { @@ -111,9 +110,7 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram vertex_program; - OGLProgram fragment_program; - OGLPipeline pipeline; + OGLProgram present_program; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..51e72b705 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -16,7 +16,6 @@ #include "video_core/host_shaders/opengl_copy_bc4_comp.h" #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindHostCompute(astc_decoder_program.handle); + program_manager.BindProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); + program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindHostCompute(pitch_unswizzle_program.handle); + program_manager.BindProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -18,6 +18,9 @@ namespace Vulkan { +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; + ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* thread_worker, const Shader::Info& info_, @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, secondary_offset}; const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TextureHandle{lhs_raw | rhs_raw, via_header_index}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); } }}; std::ranges::for_each(info.texture_buffer_descriptors, add_image); std::ranges::for_each(info.image_buffer_descriptors, add_image); for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.image); + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); - Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); samplers.push_back(sampler->Handle()); } } @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ImageId* texture_buffer_ids{image_view_ids.data()}; size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++texture_buffer_ids; ++index; } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -19,7 +19,7 @@ #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/vulkan_common/vulkan_device.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] #else #define LAMBDA_FORCEINLINE @@ -30,6 +30,7 @@ namespace { using boost::container::small_vector; using boost::container::static_vector; using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormatFromDepthFormat; using VideoCore::Surface::PixelFormatFromRenderTargetFormat; @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const u32 lhs_raw{gpu_memory.Read(addr)}; const u32 rhs_raw{gpu_memory.Read(separate_addr)}; const u32 raw{lhs_raw | rhs_raw}; - return TextureHandle{raw, via_header_index}; + return TexturePair(raw, via_header_index); } } - return TextureHandle{gpu_memory.Read(addr), via_header_index}; + return TexturePair(gpu_memory.Read(addr), via_header_index); }}; const auto add_image{[&](const auto& desc) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; } }}; if constexpr (Spec::has_texture_buffers) { @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { - const TextureHandle handle{read_handle(desc, index)}; - image_view_indices[image_index++] = handle.image; + const auto handle{read_handle(desc, index)}; + image_view_indices[image_index++] = handle.first; - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; samplers[sampler_index++] = sampler->Handle(); } } @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; for (u32 i = 0; i < desc.count; ++i) { bool is_written{false}; - if constexpr (std::is_same_v) { + if constexpr (is_image) { is_written = desc.is_written; } ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), image_view.BufferSize(), image_view.format, - is_written); + is_written, is_image); ++index; ++texture_buffer_index; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -342,28 +342,15 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { - main_pools.ReleaseContents(); - - std::array graphics_envs; - boost::container::static_vector envs; + GraphicsEnvironments environments; + GetGraphicsEnvironments(environments, graphics_key.unique_hashes); - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == 0) { - continue; - } - const auto program{static_cast(index)}; - auto& env{graphics_envs[index]}; - const u32 start_address{maxwell3d.regs.shader_config[index].offset}; - env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; - env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; + main_pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; if (pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { + serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { boost::container::static_vector env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { return scissor; } -struct TextureHandle { - constexpr TextureHandle(u32 data, bool via_header_index) { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - - u32 image; - u32 sampler; -}; - DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, bool is_indexed) { DrawParams params{ diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5..78bf90c48 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { return MakeShaderInfo(env, *cpu_shader_addr); } +void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes) { + size_t env_index{}; + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < NUM_PROGRAMS; ++index) { + if (unique_hashes[index] == 0) { + continue; + } + const auto program{static_cast(index)}; + auto& env{result.envs[index]}; + const u32 start_address{maxwell3d.regs.shader_config[index].offset}; + env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; + env.SetCachedSize(shader_infos[index]->size_bytes); + result.env_ptrs[env_index++] = &env; + } +} + ShaderInfo* ShaderCache::TryGet(VAddr addr) const { std::scoped_lock lock{lookup_mutex}; diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc84..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,14 +4,18 @@ #pragma once +#include +#include #include #include +#include #include #include #include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +#include "video_core/shader_environment.h" namespace Tegra { class MemoryManager; @@ -30,6 +34,8 @@ class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; + static constexpr size_t NUM_PROGRAMS = 6; + struct Entry { VAddr addr_start; VAddr addr_end; @@ -58,6 +64,15 @@ public: void SyncGuestHost(); protected: + struct GraphicsEnvironments { + std::array envs; + std::array env_ptrs; + + std::span Span() const noexcept { + return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); + } + }; + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_); @@ -65,17 +80,21 @@ protected: /// @brief Update the hashes and information of shader stages /// @param unique_hashes Shader hashes to store into when a stage is enabled /// @return True no success, false on error - bool RefreshStages(std::array& unique_hashes); + bool RefreshStages(std::array& unique_hashes); /// @brief Returns information about the current compute shader /// @return Pointer to a valid shader, nullptr on error const ShaderInfo* ComputeShader(); + /// @brief Collect the current graphics environments + void GetGraphicsEnvironments(GraphicsEnvironments& result, + const std::array& unique_hashes); + Tegra::MemoryManager& gpu_memory; Tegra::Engines::Maxwell3D& maxwell3d; Tegra::Engines::KeplerCompute& kepler_compute; - std::array shader_infos{}; + std::array shader_infos{}; bool last_shaders_valid = false; private: diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc0097..c93174519 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -187,8 +187,8 @@ std::optional GenericEnvironment::TryFindSize() { Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; + const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; Tegra::Texture::TICEntry entry; gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); const Shader::TextureType result{ConvertType(entry)}; diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d712045..d26dbfaab 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -29,22 +29,6 @@ class Memorymanager; namespace VideoCommon { -struct TextureHandle { - explicit TextureHandle(u32 data, bool via_header_index) { - if (via_header_index) { - image = data; - sampler = data; - } else { - const Tegra::Texture::TextureHandle handle{data}; - image = handle.tic_id; - sampler = via_header_index ? image : handle.tsc_id.Value(); - } - } - - u32 image; - u32 sampler; -}; - class GenericEnvironment : public Shader::Environment { public: explicit GenericEnvironment() = default; diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { return "Invalid"; } -std::string Name(const ImageViewBase& image_view, std::optional type) { +std::string Name(const ImageViewBase& image_view) { const u32 width = image_view.size.width; const u32 height = image_view.size.height; const u32 depth = image_view.size.depth; @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional t const u32 num_layers = image_view.range.extent.layers; const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; - switch (type.value_or(image_view.type)) { + switch (image_view.type) { case ImageViewType::e1D: return fmt::format("ImageView 1D {}{}", width, level); case ImageViewType::e2D: diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -255,8 +255,7 @@ struct RenderTargets; [[nodiscard]] std::string Name(const ImageBase& image); -[[nodiscard]] std::string Name(const ImageViewBase& image_view, - std::optional type = std::nullopt); +[[nodiscard]] std::string Name(const ImageViewBase& image_view); [[nodiscard]] std::string Name(const RenderTargets& render_targets); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -154,6 +154,15 @@ union TextureHandle { }; static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); +[[nodiscard]] inline std::pair TexturePair(u32 raw, bool via_header_index) { + if (via_header_index) { + return {raw, raw}; + } else { + const Tegra::Texture::TextureHandle handle{raw}; + return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + } +} + struct TICEntry { union { struct { diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda..e27a2b51e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, - .storageBuffer16BitAccess = false, + .storageBuffer16BitAccess = true, .uniformAndStorageBuffer16BitAccess = true, .storagePushConstant16 = false, .storageInputOutput16 = false, -- cgit v1.2.3 From bed090807afd3364ed6ef18a031a0ffd95a1b89b Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 3 May 2021 20:53:00 -0300 Subject: Move SPIR-V emission functions to their own header --- src/shader_recompiler/CMakeLists.txt | 2 + src/shader_recompiler/backend/bindings.h | 19 + src/shader_recompiler/backend/spirv/emit_context.h | 9 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 1 + src/shader_recompiler/backend/spirv/emit_spirv.h | 571 +------------------- .../backend/spirv/emit_spirv_atomic.cpp | 1 + .../backend/spirv/emit_spirv_barriers.cpp | 1 + .../spirv/emit_spirv_bitwise_conversion.cpp | 1 + .../backend/spirv/emit_spirv_composite.cpp | 1 + .../backend/spirv/emit_spirv_context_get_set.cpp | 1 + .../backend/spirv/emit_spirv_control_flow.cpp | 1 + .../backend/spirv/emit_spirv_convert.cpp | 1 + .../backend/spirv/emit_spirv_floating_point.cpp | 1 + .../backend/spirv/emit_spirv_image.cpp | 1 + .../backend/spirv/emit_spirv_image_atomic.cpp | 1 + .../backend/spirv/emit_spirv_instructions.h | 583 +++++++++++++++++++++ .../backend/spirv/emit_spirv_integer.cpp | 1 + .../backend/spirv/emit_spirv_logical.cpp | 1 + .../backend/spirv/emit_spirv_memory.cpp | 1 + .../backend/spirv/emit_spirv_select.cpp | 1 + .../backend/spirv/emit_spirv_shared_memory.cpp | 1 + .../backend/spirv/emit_spirv_special.cpp | 1 + .../backend/spirv/emit_spirv_undefined.cpp | 1 + .../backend/spirv/emit_spirv_warp.cpp | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 5 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 8 +- 26 files changed, 637 insertions(+), 579 deletions(-) create mode 100644 src/shader_recompiler/backend/bindings.h create mode 100644 src/shader_recompiler/backend/spirv/emit_spirv_instructions.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 0bcd714d6..6523615aa 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(shader_recompiler STATIC + backend/bindings.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp @@ -13,6 +14,7 @@ add_library(shader_recompiler STATIC backend/spirv/emit_spirv_floating_point.cpp backend/spirv/emit_spirv_image.cpp backend/spirv/emit_spirv_image_atomic.cpp + backend/spirv/emit_spirv_instructions.h backend/spirv/emit_spirv_integer.cpp backend/spirv/emit_spirv_logical.cpp backend/spirv/emit_spirv_memory.cpp diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h @@ -0,0 +1,19 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Shader::Backend { + +struct Bindings { + u32 unified{}; + u32 uniform_buffer{}; + u32 storage_buffer{}; + u32 texture{}; + u32 image{}; +}; + +} // namespace Shader::Backend diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 30b08104d..8b000f1ec 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -9,6 +9,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" @@ -17,14 +18,6 @@ namespace Shader::Backend::SPIRV { using Sirit::Id; -struct Bindings { - u32 unified{}; - u32 uniform_buffer{}; - u32 storage_buffer{}; - u32 texture{}; - u32 image{}; -}; - class VectorTypes { public: void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3f9adc902..0681dfd16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -9,6 +9,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/program.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 47d62b190..d8ab2d8ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -4,9 +4,12 @@ #pragma once +#include + #include #include "common/common_types.h" +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/spirv/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" @@ -16,569 +19,9 @@ namespace Shader::Backend::SPIRV { [[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding); -// Microinstruction emitters -Id EmitPhi(EmitContext& ctx, IR::Inst* inst); -void EmitVoid(EmitContext& ctx); -Id EmitIdentity(EmitContext& ctx, const IR::Value& value); -void EmitBranch(EmitContext& ctx, Id label); -void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); -void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); -void EmitSelectionMerge(EmitContext& ctx, Id merge_label); -void EmitReturn(EmitContext& ctx); -void EmitJoin(EmitContext& ctx); -void EmitUnreachable(EmitContext& ctx); -void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); -void EmitBarrier(EmitContext& ctx); -void EmitWorkgroupMemoryBarrier(EmitContext& ctx); -void EmitDeviceMemoryBarrier(EmitContext& ctx); -void EmitPrologue(EmitContext& ctx); -void EmitEpilogue(EmitContext& ctx); -void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); -void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); -void EmitGetRegister(EmitContext& ctx); -void EmitSetRegister(EmitContext& ctx); -void EmitGetPred(EmitContext& ctx); -void EmitSetPred(EmitContext& ctx); -void EmitSetGotoVariable(EmitContext& ctx); -void EmitGetGotoVariable(EmitContext& ctx); -void EmitSetIndirectBranchVariable(EmitContext& ctx); -void EmitGetIndirectBranchVariable(EmitContext& ctx); -Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); -Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); -void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); -Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); -void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); -void EmitSetSampleMask(EmitContext& ctx, Id value); -void EmitSetFragDepth(EmitContext& ctx, Id value); -void EmitGetZFlag(EmitContext& ctx); -void EmitGetSFlag(EmitContext& ctx); -void EmitGetCFlag(EmitContext& ctx); -void EmitGetOFlag(EmitContext& ctx); -void EmitSetZFlag(EmitContext& ctx); -void EmitSetSFlag(EmitContext& ctx); -void EmitSetCFlag(EmitContext& ctx); -void EmitSetOFlag(EmitContext& ctx); -Id EmitWorkgroupId(EmitContext& ctx); -Id EmitLocalInvocationId(EmitContext& ctx); -Id EmitInvocationId(EmitContext& ctx); -Id EmitSampleId(EmitContext& ctx); -Id EmitIsHelperInvocation(EmitContext& ctx); -Id EmitYDirection(EmitContext& ctx); -Id EmitLoadLocal(EmitContext& ctx, Id word_offset); -void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); -Id EmitUndefU1(EmitContext& ctx); -Id EmitUndefU8(EmitContext& ctx); -Id EmitUndefU16(EmitContext& ctx); -Id EmitUndefU32(EmitContext& ctx); -Id EmitUndefU64(EmitContext& ctx); -void EmitLoadGlobalU8(EmitContext& ctx); -void EmitLoadGlobalS8(EmitContext& ctx); -void EmitLoadGlobalU16(EmitContext& ctx); -void EmitLoadGlobalS16(EmitContext& ctx); -Id EmitLoadGlobal32(EmitContext& ctx, Id address); -Id EmitLoadGlobal64(EmitContext& ctx, Id address); -Id EmitLoadGlobal128(EmitContext& ctx, Id address); -void EmitWriteGlobalU8(EmitContext& ctx); -void EmitWriteGlobalS8(EmitContext& ctx); -void EmitWriteGlobalU16(EmitContext& ctx); -void EmitWriteGlobalS16(EmitContext& ctx); -void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); -void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); -Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitLoadSharedU8(EmitContext& ctx, Id offset); -Id EmitLoadSharedS8(EmitContext& ctx, Id offset); -Id EmitLoadSharedU16(EmitContext& ctx, Id offset); -Id EmitLoadSharedS16(EmitContext& ctx, Id offset); -Id EmitLoadSharedU32(EmitContext& ctx, Id offset); -Id EmitLoadSharedU64(EmitContext& ctx, Id offset); -Id EmitLoadSharedU128(EmitContext& ctx, Id offset); -void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); -void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); -Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); -Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); -Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); -Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); -Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); -void EmitCompositeConstructF64x2(EmitContext& ctx); -void EmitCompositeConstructF64x3(EmitContext& ctx); -void EmitCompositeConstructF64x4(EmitContext& ctx); -void EmitCompositeExtractF64x2(EmitContext& ctx); -void EmitCompositeExtractF64x3(EmitContext& ctx); -void EmitCompositeExtractF64x4(EmitContext& ctx); -Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); -Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); -Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); -void EmitBitCastU16F16(EmitContext& ctx); -Id EmitBitCastU32F32(EmitContext& ctx, Id value); -void EmitBitCastU64F64(EmitContext& ctx); -void EmitBitCastF16U16(EmitContext& ctx); -Id EmitBitCastF32U32(EmitContext& ctx, Id value); -void EmitBitCastF64U64(EmitContext& ctx); -Id EmitPackUint2x32(EmitContext& ctx, Id value); -Id EmitUnpackUint2x32(EmitContext& ctx, Id value); -Id EmitPackFloat2x16(EmitContext& ctx, Id value); -Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); -Id EmitPackHalf2x16(EmitContext& ctx, Id value); -Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); -Id EmitPackDouble2x32(EmitContext& ctx, Id value); -Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); -void EmitGetZeroFromOp(EmitContext& ctx); -void EmitGetSignFromOp(EmitContext& ctx); -void EmitGetCarryFromOp(EmitContext& ctx); -void EmitGetOverflowFromOp(EmitContext& ctx); -void EmitGetSparseFromOp(EmitContext& ctx); -void EmitGetInBoundsFromOp(EmitContext& ctx); -Id EmitFPAbs16(EmitContext& ctx, Id value); -Id EmitFPAbs32(EmitContext& ctx, Id value); -Id EmitFPAbs64(EmitContext& ctx, Id value); -Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); -Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); -Id EmitFPMin64(EmitContext& ctx, Id a, Id b); -Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitFPNeg16(EmitContext& ctx, Id value); -Id EmitFPNeg32(EmitContext& ctx, Id value); -Id EmitFPNeg64(EmitContext& ctx, Id value); -Id EmitFPSin(EmitContext& ctx, Id value); -Id EmitFPCos(EmitContext& ctx, Id value); -Id EmitFPExp2(EmitContext& ctx, Id value); -Id EmitFPLog2(EmitContext& ctx, Id value); -Id EmitFPRecip32(EmitContext& ctx, Id value); -Id EmitFPRecip64(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); -Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); -Id EmitFPSqrt(EmitContext& ctx, Id value); -Id EmitFPSaturate16(EmitContext& ctx, Id value); -Id EmitFPSaturate32(EmitContext& ctx, Id value); -Id EmitFPSaturate64(EmitContext& ctx, Id value); -Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); -Id EmitFPRoundEven16(EmitContext& ctx, Id value); -Id EmitFPRoundEven32(EmitContext& ctx, Id value); -Id EmitFPRoundEven64(EmitContext& ctx, Id value); -Id EmitFPFloor16(EmitContext& ctx, Id value); -Id EmitFPFloor32(EmitContext& ctx, Id value); -Id EmitFPFloor64(EmitContext& ctx, Id value); -Id EmitFPCeil16(EmitContext& ctx, Id value); -Id EmitFPCeil32(EmitContext& ctx, Id value); -Id EmitFPCeil64(EmitContext& ctx, Id value); -Id EmitFPTrunc16(EmitContext& ctx, Id value); -Id EmitFPTrunc32(EmitContext& ctx, Id value); -Id EmitFPTrunc64(EmitContext& ctx, Id value); -Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); -Id EmitFPIsNan16(EmitContext& ctx, Id value); -Id EmitFPIsNan32(EmitContext& ctx, Id value); -Id EmitFPIsNan64(EmitContext& ctx, Id value); -Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitISub32(EmitContext& ctx, Id a, Id b); -Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitIMul32(EmitContext& ctx, Id a, Id b); -Id EmitINeg32(EmitContext& ctx, Id value); -Id EmitINeg64(EmitContext& ctx, Id value); -Id EmitIAbs32(EmitContext& ctx, Id value); -Id EmitIAbs64(EmitContext& ctx, Id value); -Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); -Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); -Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); -Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); -Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); -Id EmitBitReverse32(EmitContext& ctx, Id value); -Id EmitBitCount32(EmitContext& ctx, Id value); -Id EmitBitwiseNot32(EmitContext& ctx, Id value); -Id EmitFindSMsb32(EmitContext& ctx, Id value); -Id EmitFindUMsb32(EmitContext& ctx, Id value); -Id EmitSMin32(EmitContext& ctx, Id a, Id b); -Id EmitUMin32(EmitContext& ctx, Id a, Id b); -Id EmitSMax32(EmitContext& ctx, Id a, Id b); -Id EmitUMax32(EmitContext& ctx, Id a, Id b); -Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); -Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); -Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, - Id value); -Id EmitGlobalAtomicIAdd32(EmitContext& ctx); -Id EmitGlobalAtomicSMin32(EmitContext& ctx); -Id EmitGlobalAtomicUMin32(EmitContext& ctx); -Id EmitGlobalAtomicSMax32(EmitContext& ctx); -Id EmitGlobalAtomicUMax32(EmitContext& ctx); -Id EmitGlobalAtomicInc32(EmitContext& ctx); -Id EmitGlobalAtomicDec32(EmitContext& ctx); -Id EmitGlobalAtomicAnd32(EmitContext& ctx); -Id EmitGlobalAtomicOr32(EmitContext& ctx); -Id EmitGlobalAtomicXor32(EmitContext& ctx); -Id EmitGlobalAtomicExchange32(EmitContext& ctx); -Id EmitGlobalAtomicIAdd64(EmitContext& ctx); -Id EmitGlobalAtomicSMin64(EmitContext& ctx); -Id EmitGlobalAtomicUMin64(EmitContext& ctx); -Id EmitGlobalAtomicSMax64(EmitContext& ctx); -Id EmitGlobalAtomicUMax64(EmitContext& ctx); -Id EmitGlobalAtomicInc64(EmitContext& ctx); -Id EmitGlobalAtomicDec64(EmitContext& ctx); -Id EmitGlobalAtomicAnd64(EmitContext& ctx); -Id EmitGlobalAtomicOr64(EmitContext& ctx); -Id EmitGlobalAtomicXor64(EmitContext& ctx); -Id EmitGlobalAtomicExchange64(EmitContext& ctx); -Id EmitGlobalAtomicAddF32(EmitContext& ctx); -Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); -Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); -Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); -Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); -Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); -Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); -Id EmitLogicalNot(EmitContext& ctx, Id value); -Id EmitConvertS16F16(EmitContext& ctx, Id value); -Id EmitConvertS16F32(EmitContext& ctx, Id value); -Id EmitConvertS16F64(EmitContext& ctx, Id value); -Id EmitConvertS32F16(EmitContext& ctx, Id value); -Id EmitConvertS32F32(EmitContext& ctx, Id value); -Id EmitConvertS32F64(EmitContext& ctx, Id value); -Id EmitConvertS64F16(EmitContext& ctx, Id value); -Id EmitConvertS64F32(EmitContext& ctx, Id value); -Id EmitConvertS64F64(EmitContext& ctx, Id value); -Id EmitConvertU16F16(EmitContext& ctx, Id value); -Id EmitConvertU16F32(EmitContext& ctx, Id value); -Id EmitConvertU16F64(EmitContext& ctx, Id value); -Id EmitConvertU32F16(EmitContext& ctx, Id value); -Id EmitConvertU32F32(EmitContext& ctx, Id value); -Id EmitConvertU32F64(EmitContext& ctx, Id value); -Id EmitConvertU64F16(EmitContext& ctx, Id value); -Id EmitConvertU64F32(EmitContext& ctx, Id value); -Id EmitConvertU64F64(EmitContext& ctx, Id value); -Id EmitConvertU64U32(EmitContext& ctx, Id value); -Id EmitConvertU32U64(EmitContext& ctx, Id value); -Id EmitConvertF16F32(EmitContext& ctx, Id value); -Id EmitConvertF32F16(EmitContext& ctx, Id value); -Id EmitConvertF32F64(EmitContext& ctx, Id value); -Id EmitConvertF64F32(EmitContext& ctx, Id value); -Id EmitConvertF16S8(EmitContext& ctx, Id value); -Id EmitConvertF16S16(EmitContext& ctx, Id value); -Id EmitConvertF16S32(EmitContext& ctx, Id value); -Id EmitConvertF16S64(EmitContext& ctx, Id value); -Id EmitConvertF16U8(EmitContext& ctx, Id value); -Id EmitConvertF16U16(EmitContext& ctx, Id value); -Id EmitConvertF16U32(EmitContext& ctx, Id value); -Id EmitConvertF16U64(EmitContext& ctx, Id value); -Id EmitConvertF32S8(EmitContext& ctx, Id value); -Id EmitConvertF32S16(EmitContext& ctx, Id value); -Id EmitConvertF32S32(EmitContext& ctx, Id value); -Id EmitConvertF32S64(EmitContext& ctx, Id value); -Id EmitConvertF32U8(EmitContext& ctx, Id value); -Id EmitConvertF32U16(EmitContext& ctx, Id value); -Id EmitConvertF32U32(EmitContext& ctx, Id value); -Id EmitConvertF32U64(EmitContext& ctx, Id value); -Id EmitConvertF64S8(EmitContext& ctx, Id value); -Id EmitConvertF64S16(EmitContext& ctx, Id value); -Id EmitConvertF64S32(EmitContext& ctx, Id value); -Id EmitConvertF64S64(EmitContext& ctx, Id value); -Id EmitConvertF64U8(EmitContext& ctx, Id value); -Id EmitConvertF64U16(EmitContext& ctx, Id value); -Id EmitConvertF64U32(EmitContext& ctx, Id value); -Id EmitConvertF64U64(EmitContext& ctx, Id value); -Id EmitBindlessImageSampleImplicitLod(EmitContext&); -Id EmitBindlessImageSampleExplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); -Id EmitBindlessImageGather(EmitContext&); -Id EmitBindlessImageGatherDref(EmitContext&); -Id EmitBindlessImageFetch(EmitContext&); -Id EmitBindlessImageQueryDimensions(EmitContext&); -Id EmitBindlessImageQueryLod(EmitContext&); -Id EmitBindlessImageGradient(EmitContext&); -Id EmitBindlessImageRead(EmitContext&); -Id EmitBindlessImageWrite(EmitContext&); -Id EmitBoundImageSampleImplicitLod(EmitContext&); -Id EmitBoundImageSampleExplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); -Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); -Id EmitBoundImageGather(EmitContext&); -Id EmitBoundImageGatherDref(EmitContext&); -Id EmitBoundImageFetch(EmitContext&); -Id EmitBoundImageQueryDimensions(EmitContext&); -Id EmitBoundImageQueryLod(EmitContext&); -Id EmitBoundImageGradient(EmitContext&); -Id EmitBoundImageRead(EmitContext&); -Id EmitBoundImageWrite(EmitContext&); -Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id bias_lc, const IR::Value& offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id lod_lc, const IR::Value& offset); -Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id bias_lc, const IR::Value& offset); -Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, - Id coords, Id dref, Id lod_lc, const IR::Value& offset); -Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2); -Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - const IR::Value& offset, const IR::Value& offset2, Id dref); -Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, - Id lod, Id ms); -Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); -Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id derivates, Id offset, Id lod_clamp); -Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); -void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); -Id EmitBindlessImageAtomicIAdd32(EmitContext&); -Id EmitBindlessImageAtomicSMin32(EmitContext&); -Id EmitBindlessImageAtomicUMin32(EmitContext&); -Id EmitBindlessImageAtomicSMax32(EmitContext&); -Id EmitBindlessImageAtomicUMax32(EmitContext&); -Id EmitBindlessImageAtomicInc32(EmitContext&); -Id EmitBindlessImageAtomicDec32(EmitContext&); -Id EmitBindlessImageAtomicAnd32(EmitContext&); -Id EmitBindlessImageAtomicOr32(EmitContext&); -Id EmitBindlessImageAtomicXor32(EmitContext&); -Id EmitBindlessImageAtomicExchange32(EmitContext&); -Id EmitBoundImageAtomicIAdd32(EmitContext&); -Id EmitBoundImageAtomicSMin32(EmitContext&); -Id EmitBoundImageAtomicUMin32(EmitContext&); -Id EmitBoundImageAtomicSMax32(EmitContext&); -Id EmitBoundImageAtomicUMax32(EmitContext&); -Id EmitBoundImageAtomicInc32(EmitContext&); -Id EmitBoundImageAtomicDec32(EmitContext&); -Id EmitBoundImageAtomicAnd32(EmitContext&); -Id EmitBoundImageAtomicOr32(EmitContext&); -Id EmitBoundImageAtomicXor32(EmitContext&); -Id EmitBoundImageAtomicExchange32(EmitContext&); -Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, - Id value); -Id EmitLaneId(EmitContext& ctx); -Id EmitVoteAll(EmitContext& ctx, Id pred); -Id EmitVoteAny(EmitContext& ctx, Id pred); -Id EmitVoteEqual(EmitContext& ctx, Id pred); -Id EmitSubgroupBallot(EmitContext& ctx, Id pred); -Id EmitSubgroupEqMask(EmitContext& ctx); -Id EmitSubgroupLtMask(EmitContext& ctx); -Id EmitSubgroupLeMask(EmitContext& ctx); -Id EmitSubgroupGtMask(EmitContext& ctx); -Id EmitSubgroupGeMask(EmitContext& ctx); -Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, - Id segmentation_mask); -Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); - -Id EmitDPdxFine(EmitContext& ctx, Id op_a); - -Id EmitDPdyFine(EmitContext& ctx, Id op_a); - -Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); - -Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); +[[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitSPIRV(profile, program, binding); +} } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 6e17d1c7e..053800eb7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp index 705aebd81..e0b52a001 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 93a45d834..bb11f4f4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 079e226de..10ff4ecab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ef32184ea..8e57ff070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -6,6 +6,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp index b4a6fbb93..6154c46be 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index acb8957fe..fd74e475f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index b3afbef25..61cf25f9c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 6680cf1b3..5832104df 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp index 05bed22b9..d7f1a365a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..b5eec3cd1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -0,0 +1,583 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/common_types.h" + +namespace IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace IR + +namespace Shader::Backend::SPIRV { + +using Sirit::Id; + +class EmitContext; + +// Microinstruction emitters +Id EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +Id EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitBranch(EmitContext& ctx, Id label); +void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id false_label); +void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); +void EmitSelectionMerge(EmitContext& ctx, Id merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); +Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitIsHelperInvocation(EmitContext& ctx); +Id EmitYDirection(EmitContext& ctx); +Id EmitLoadLocal(EmitContext& ctx, Id word_offset); +void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +Id EmitLoadGlobal32(EmitContext& ctx, Id address); +Id EmitLoadGlobal64(EmitContext& ctx, Id address); +Id EmitLoadGlobal128(EmitContext& ctx, Id address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); +void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); +Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitLoadSharedU8(EmitContext& ctx, Id offset); +Id EmitLoadSharedS8(EmitContext& ctx, Id offset); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); +Id EmitLoadSharedS16(EmitContext& ctx, Id offset); +Id EmitLoadSharedU32(EmitContext& ctx, Id offset); +Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +Id EmitLoadSharedU128(EmitContext& ctx, Id offset); +void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); +void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); +Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx); +Id EmitBitCastU32F32(EmitContext& ctx, Id value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +Id EmitBitCastF32U32(EmitContext& ctx, Id value); +void EmitBitCastF64U64(EmitContext& ctx); +Id EmitPackUint2x32(EmitContext& ctx, Id value); +Id EmitUnpackUint2x32(EmitContext& ctx, Id value); +Id EmitPackFloat2x16(EmitContext& ctx, Id value); +Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); +Id EmitPackHalf2x16(EmitContext& ctx, Id value); +Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); +Id EmitPackDouble2x32(EmitContext& ctx, Id value); +Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitIAbs64(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitGlobalAtomicIAdd32(EmitContext& ctx); +Id EmitGlobalAtomicSMin32(EmitContext& ctx); +Id EmitGlobalAtomicUMin32(EmitContext& ctx); +Id EmitGlobalAtomicSMax32(EmitContext& ctx); +Id EmitGlobalAtomicUMax32(EmitContext& ctx); +Id EmitGlobalAtomicInc32(EmitContext& ctx); +Id EmitGlobalAtomicDec32(EmitContext& ctx); +Id EmitGlobalAtomicAnd32(EmitContext& ctx); +Id EmitGlobalAtomicOr32(EmitContext& ctx); +Id EmitGlobalAtomicXor32(EmitContext& ctx); +Id EmitGlobalAtomicExchange32(EmitContext& ctx); +Id EmitGlobalAtomicIAdd64(EmitContext& ctx); +Id EmitGlobalAtomicSMin64(EmitContext& ctx); +Id EmitGlobalAtomicUMin64(EmitContext& ctx); +Id EmitGlobalAtomicSMax64(EmitContext& ctx); +Id EmitGlobalAtomicUMax64(EmitContext& ctx); +Id EmitGlobalAtomicInc64(EmitContext& ctx); +Id EmitGlobalAtomicDec64(EmitContext& ctx); +Id EmitGlobalAtomicAnd64(EmitContext& ctx); +Id EmitGlobalAtomicOr64(EmitContext& ctx); +Id EmitGlobalAtomicXor64(EmitContext& ctx); +Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicAddF32(EmitContext& ctx); +Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); +Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitBindlessImageSampleImplicitLod(EmitContext&); +Id EmitBindlessImageSampleExplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +Id EmitBindlessImageGather(EmitContext&); +Id EmitBindlessImageGatherDref(EmitContext&); +Id EmitBindlessImageFetch(EmitContext&); +Id EmitBindlessImageQueryDimensions(EmitContext&); +Id EmitBindlessImageQueryLod(EmitContext&); +Id EmitBindlessImageGradient(EmitContext&); +Id EmitBindlessImageRead(EmitContext&); +Id EmitBindlessImageWrite(EmitContext&); +Id EmitBoundImageSampleImplicitLod(EmitContext&); +Id EmitBoundImageSampleExplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); +Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); +Id EmitBoundImageGather(EmitContext&); +Id EmitBoundImageGatherDref(EmitContext&); +Id EmitBoundImageFetch(EmitContext&); +Id EmitBoundImageQueryDimensions(EmitContext&); +Id EmitBoundImageQueryLod(EmitContext&); +Id EmitBoundImageGradient(EmitContext&); +Id EmitBoundImageRead(EmitContext&); +Id EmitBoundImageWrite(EmitContext&); +Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id bias_lc, const IR::Value& offset); +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id lod_lc, const IR::Value& offset); +Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id bias_lc, const IR::Value& offset); +Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + Id coords, Id dref, Id lod_lc, const IR::Value& offset); +Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2); +Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + const IR::Value& offset, const IR::Value& offset2, Id dref); +Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, + Id lod, Id ms); +Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); +Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id derivates, Id offset, Id lod_clamp); +Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); +Id EmitBindlessImageAtomicIAdd32(EmitContext&); +Id EmitBindlessImageAtomicSMin32(EmitContext&); +Id EmitBindlessImageAtomicUMin32(EmitContext&); +Id EmitBindlessImageAtomicSMax32(EmitContext&); +Id EmitBindlessImageAtomicUMax32(EmitContext&); +Id EmitBindlessImageAtomicInc32(EmitContext&); +Id EmitBindlessImageAtomicDec32(EmitContext&); +Id EmitBindlessImageAtomicAnd32(EmitContext&); +Id EmitBindlessImageAtomicOr32(EmitContext&); +Id EmitBindlessImageAtomicXor32(EmitContext&); +Id EmitBindlessImageAtomicExchange32(EmitContext&); +Id EmitBoundImageAtomicIAdd32(EmitContext&); +Id EmitBoundImageAtomicSMin32(EmitContext&); +Id EmitBoundImageAtomicUMin32(EmitContext&); +Id EmitBoundImageAtomicSMax32(EmitContext&); +Id EmitBoundImageAtomicUMax32(EmitContext&); +Id EmitBoundImageAtomicInc32(EmitContext&); +Id EmitBoundImageAtomicDec32(EmitContext&); +Id EmitBoundImageAtomicAnd32(EmitContext&); +Id EmitBoundImageAtomicOr32(EmitContext&); +Id EmitBoundImageAtomicXor32(EmitContext&); +Id EmitBoundImageAtomicExchange32(EmitContext&); +Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, + Id value); +Id EmitLaneId(EmitContext& ctx); +Id EmitVoteAll(EmitContext& ctx, Id pred); +Id EmitVoteAny(EmitContext& ctx, Id pred); +Id EmitVoteEqual(EmitContext& ctx, Id pred); +Id EmitSubgroupBallot(EmitContext& ctx, Id pred); +Id EmitSubgroupEqMask(EmitContext& ctx); +Id EmitSubgroupLtMask(EmitContext& ctx); +Id EmitSubgroupLeMask(EmitContext& ctx); +Id EmitSubgroupGtMask(EmitContext& ctx); +Id EmitSubgroupGeMask(EmitContext& ctx); +Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, + Id segmentation_mask); +Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); +Id EmitDPdxFine(EmitContext& ctx, Id op_a); +Id EmitDPdyFine(EmitContext& ctx, Id op_a); +Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); +Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 86e6a4f3b..06ab23b1d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp index bb434def2..b9a9500fc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index a6a3f3351..37a66095f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -5,6 +5,7 @@ #include #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 0b45db45e..c5b4f4720 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 710d1cd25..9a79fc7a2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index d5430e905..ba948f3c9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp index 19b06dbe4..c9f469e90 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 239e2ecab..78b1e1ba7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" namespace Shader::Backend::SPIRV { namespace { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c9ca1f005..6585817bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,7 +254,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( OGLProgram gl_program; gl_program.handle = glCreateProgram(); - Shader::Backend::SPIRV::Bindings binding; + Shader::Backend::Bindings binding; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -297,8 +297,7 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, program)}; OGLProgram gl_program; gl_program.handle = glCreateProgram(); AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30b71bdbc..a5edcd072 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,8 +315,9 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; - Shader::Backend::SPIRV::Bindings binding; - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { + Shader::Backend::Bindings binding; + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -388,8 +389,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - Shader::Backend::SPIRV::Bindings binding; - const std::vector code{EmitSPIRV(base_profile, program, binding)}; + const std::vector code{EmitSPIRV(base_profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { -- cgit v1.2.3 From bfa47539f6d5779a80d6fb23ae49c1d34e01ae93 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 5 May 2021 01:08:16 -0300 Subject: gl_shader_cache: Remove code unintentionally committed --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6585817bc..9bbdfeb62 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,9 +266,6 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( infos[stage_index] = &program.info; const std::vector code{EmitSPIRV(profile, program, binding)}; - FILE* file = fopen("D:\\shader.spv", "wb"); - fwrite(code.data(), 4, code.size(), file); - fclose(file); AddShader(Stage(stage_index), gl_program.handle, code); } LinkProgram(gl_program.handle); -- cgit v1.2.3 From 2c81ad831192a8234e26a61706f18b460999c89f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:34:41 -0300 Subject: glasm: Initial GLASM compute implementation for testing --- .../renderer_opengl/gl_compute_program.cpp | 17 +++++++--- .../renderer_opengl/gl_compute_program.h | 7 ++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 37 ++++++++++++++++++---- 3 files changed, 47 insertions(+), 14 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index d5ef65439..fb54618a4 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -29,11 +29,11 @@ bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_) + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, - program_manager{program_manager_}, program{std::move(program_)}, info{info_} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { for (const auto& desc : info.texture_buffer_descriptors) { num_texture_buffers += desc.count; } @@ -124,6 +124,14 @@ void ComputeProgram::Configure() { const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); + if (assembly_program.handle != 0) { + // FIXME: State track this + glEnable(GL_COMPUTE_PROGRAM_NV); + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(source_program.handle); + } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; const auto add_buffer{[&](const auto& desc) { @@ -172,7 +180,6 @@ void ComputeProgram::Configure() { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - program_manager.BindProgram(program.handle); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h index 64a75d44d..ddb00dc1d 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ b/src/video_core/renderer_opengl/gl_compute_program.h @@ -52,8 +52,8 @@ public: explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, OGLProgram program_, - const Shader::Info& info_); + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); @@ -64,8 +64,9 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; ProgramManager& program_manager; - OGLProgram program; Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9bbdfeb62..d9f0bca78 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -16,6 +16,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -89,6 +90,7 @@ const Shader::Profile profile{ .xfb_varyings = {}, }; +using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; @@ -151,6 +153,22 @@ void LinkProgram(GLuint program) { } } +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (!Settings::values.renderer_debug) { + return program; + } + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + LOG_CRITICAL(Render_OpenGL, "{}", err); + LOG_INFO(Render_OpenGL, "{}", code); + } + return program; +} + GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -294,13 +312,20 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(profile, program)}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); - LinkProgram(gl_program.handle); + OGLAssemblyProgram asm_program; + OGLProgram source_program; + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + const std::vector code{EmitSPIRV(profile, program)}; + source_program.handle = glCreateProgram(); + AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, std::move(gl_program), program.info); + program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL -- cgit v1.2.3 From dc02cb92e43d2ef05197e4edb2573116d7ae58c1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 8 May 2021 16:36:51 -0300 Subject: gl_rasterizer: Flush L2 caches before glFlush on GLASM --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e527b76ba..4834d58f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -473,6 +473,14 @@ void RasterizerOpenGL::FlushCommands() { return; } num_queued_commands = 0; + + // Make sure memory stored from the previous GL command stream is visible + // This is only needed on assembly shaders where we write to GPU memory with raw pointers + // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used + // and prefer using NV_shader_storage_buffer_object when possible + if (Settings::values.use_assembly_shaders.GetValue()) { + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + } glFlush(); } -- cgit v1.2.3 From 258f2dec1bc6f1f9d966579c1efb96f76d947060 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 15 May 2021 18:19:08 -0300 Subject: opengl: Initial (broken) support to GLASM shaders --- .../renderer_opengl/gl_graphics_program.cpp | 15 ++++++- .../renderer_opengl/gl_graphics_program.h | 6 ++- src/video_core/renderer_opengl/gl_shader_cache.cpp | 46 ++++++++++++++++------ 3 files changed, 53 insertions(+), 14 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index fd0958719..7c0bf7bc8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -33,10 +33,12 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, + std::array assembly_programs_, const std::array& infos) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)} { + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -290,7 +292,16 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - program_manager.BindProgram(program.handle); + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 5adf3f41e..58aa4b0bc 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -73,7 +73,9 @@ public: Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, const std::array& infos); + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos); void Configure(bool is_indexed); @@ -86,6 +88,8 @@ private: StateTracker& state_tracker; OGLProgram program; + std::array assembly_programs; + std::array stage_infos{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d9f0bca78..c10ea2f60 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -185,6 +185,23 @@ GLenum Stage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -269,10 +286,12 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } std::array infos{}; - OGLProgram gl_program; - gl_program.handle = glCreateProgram(); - + OGLProgram source_program; + std::array assembly_programs; Shader::Backend::Bindings binding; + if (!device.UseAssemblyShaders()) { + source_program.handle = glCreateProgram(); + } for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -282,15 +301,20 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - - const std::vector code{EmitSPIRV(profile, program, binding)}; - AddShader(Stage(stage_index), gl_program.handle, code); + if (device.UseAssemblyShaders()) { + const std::string code{EmitGLASM(profile, program)}; + assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + } else { + const std::vector code{EmitSPIRV(profile, program, binding)}; + AddShader(Stage(stage_index), source_program.handle, code); + } } - LinkProgram(gl_program.handle); - - return std::make_unique(texture_cache, buffer_cache, gpu_memory, maxwell3d, - program_manager, state_tracker, std::move(gl_program), - infos); + if (!device.UseAssemblyShaders()) { + LinkProgram(source_program.handle); + } + return std::make_unique( + texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + std::move(source_program), std::move(assembly_programs), infos); } std::unique_ptr ShaderCache::CreateComputeProgram( -- cgit v1.2.3 From 8b7d5912d61d56f65fb7e3a03bba544a4c40bfa6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:04:09 -0300 Subject: glasm: Support textures used in more than one stage --- src/shader_recompiler/backend/glasm/emit_context.cpp | 10 +++++++++- src/shader_recompiler/backend/glasm/emit_context.h | 14 +++++++++++++- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 4 ++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 4903e9d8e..d1fe84a5f 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -4,6 +4,7 @@ #include +#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" @@ -22,7 +23,7 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program) { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings) : info{program.info} { // FIXME: Temporary partial implementation u32 cbuf_index{}; for (const auto& desc : program.info.constant_buffer_descriptors) { @@ -79,6 +80,13 @@ EmitContext::EmitContext(IR::Program& program) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } + const size_t num_textures{program.info.texture_descriptors.size()}; + texture_bindings.resize(num_textures); + for (size_t index = 0; index < num_textures; ++index) { + const auto& desc{program.info.texture_descriptors[index]}; + texture_bindings[index] = bindings.texture; + bindings.texture += desc.count; + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index 4efe42ada..084635c77 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -6,11 +6,20 @@ #include #include +#include #include #include "shader_recompiler/backend/glasm/reg_alloc.h" +namespace Shader { +struct Info; +} + +namespace Shader::Backend { +struct Bindings; +} + namespace Shader::IR { class Inst; struct Program; @@ -20,7 +29,7 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program); + explicit EmitContext(IR::Program& program, Bindings& bindings); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -45,6 +54,9 @@ public: std::string code; RegAlloc reg_alloc{*this}; + const Info& info; + + std::vector texture_bindings; std::string_view stage_name = "invalid"; }; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index a893fa3fb..edf6f5e13 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -312,8 +312,8 @@ std::string_view StageHeader(Stage stage) { } } // Anonymous namespace -std::string EmitGLASM(const Profile&, IR::Program& program, Bindings&) { - EmitContext ctx{program}; +std::string EmitGLASM(const Profile&, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c10ea2f60..b84b36b9d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -302,7 +302,7 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, program, binding)}; -- cgit v1.2.3 From 85fc7e584ef9d64bae3269e7993bbf919bd10640 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 18 May 2021 21:07:18 -0300 Subject: HACK: Bind stages before and after bindings Works around a bug where program parameters are only applied to the current stage, and this one wasn't bound at the moment. Affects all SSBO usages on GLASM. --- src/video_core/renderer_opengl/gl_graphics_program.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 7c0bf7bc8..4ac026502 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -240,6 +240,17 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + // FIXME: Unhack this + if (assembly_programs[0].handle != 0) { + // TODO: State track this + glEnable(GL_VERTEX_PROGRAM_NV); + glEnable(GL_FRAGMENT_PROGRAM_NV); + glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); + glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); + program_manager.BindProgram(0); + } else { + program_manager.BindProgram(program.handle); + } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; GLsizei image_binding = 0; -- cgit v1.2.3 From c5ca4fe451c398542f4f6c5e468e0bb96866175d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:53:51 -0300 Subject: renderer_opengl: State track assembly programs --- .../renderer_opengl/gl_graphics_program.cpp | 27 +++--------- .../renderer_opengl/gl_graphics_program.h | 1 + src/video_core/renderer_opengl/gl_shader_manager.h | 51 ++++++++++++++++++++-- 3 files changed, 56 insertions(+), 23 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 4ac026502..b5d75aa13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -42,6 +42,9 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } u32 num_textures{}; u32 num_images{}; for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { @@ -182,6 +185,9 @@ void GraphicsProgram::Configure(bool is_indexed) { const std::span indices_span(image_view_indices.data(), image_view_index); texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + ImageId* texture_buffer_index{image_view_ids.data()}; const auto bind_stage_info{[&](size_t stage) { size_t index{}; @@ -240,14 +246,8 @@ void GraphicsProgram::Configure(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - // FIXME: Unhack this if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { program_manager.BindProgram(program.handle); } @@ -300,19 +300,6 @@ void GraphicsProgram::Configure(bool is_indexed) { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } - texture_cache.UpdateRenderTargets(false); - - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - if (assembly_programs[0].handle != 0) { - // TODO: State track this - glEnable(GL_VERTEX_PROGRAM_NV); - glEnable(GL_FRAGMENT_PROGRAM_NV); - glBindProgramARB(GL_VERTEX_PROGRAM_NV, assembly_programs[0].handle); - glBindProgramARB(GL_FRAGMENT_PROGRAM_NV, assembly_programs[4].handle); - program_manager.BindProgram(0); - } else { - program_manager.BindProgram(program.handle); - } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 58aa4b0bc..18292bb16 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -89,6 +89,7 @@ private: OGLProgram program; std::array assembly_programs; + u32 enabled_stages_mask{}; std::array stage_infos{}; std::array base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 70781d6f5..48669b3cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,24 +4,69 @@ #pragma once +#include +#include + #include +#include "video_core/renderer_opengl/gl_resource_manager.h" + +#pragma optimize("", off) + namespace OpenGL { class ProgramManager { + static constexpr size_t NUM_STAGES = 5; + + static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ + GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, + GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, + }; + public: void BindProgram(GLuint program) { - if (bound_program == program) { + if (current_source_program == program) { return; } - bound_program = program; + current_source_program = program; glUseProgram(program); } + void BindAssemblyPrograms(std::span programs, + u32 stage_mask) { + const u32 changed_mask = current_assembly_mask ^ stage_mask; + current_assembly_mask = stage_mask; + + if (changed_mask != 0) { + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (((changed_mask >> stage) & 1) != 0) { + if (((stage_mask >> stage) & 1) != 0) { + glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } else { + glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]); + } + } + } + } + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_assembly_programs[stage] != programs[stage].handle) { + current_assembly_programs[stage] = programs[stage].handle; + glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); + } + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void RestoreGuestCompute() {} private: - GLuint bound_program = 0; + GLuint current_source_program = 0; + + u32 current_assembly_mask = 0; + std::array current_assembly_programs; }; } // namespace OpenGL -- cgit v1.2.3 From 690b1841e6a1437335c0aae6d934f3fdcdb1680c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 16:59:05 -0300 Subject: renderer_opengl: State track compute assembly programs --- src/video_core/renderer_opengl/gl_compute_program.cpp | 5 +---- src/video_core/renderer_opengl/gl_shader_manager.h | 19 +++++++++++++++++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp index fb54618a4..ce52a0052 100644 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp @@ -125,10 +125,7 @@ void ComputeProgram::Configure() { texture_cache.FillComputeImageViews(indices_span, image_view_ids); if (assembly_program.handle != 0) { - // FIXME: State track this - glEnable(GL_COMPUTE_PROGRAM_NV); - glBindProgramARB(GL_COMPUTE_PROGRAM_NV, assembly_program.handle); - program_manager.BindProgram(0); + program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { program_manager.BindProgram(source_program.handle); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 48669b3cd..df7e1f644 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -10,6 +10,7 @@ #include #include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_device.h" #pragma optimize("", off) @@ -24,6 +25,12 @@ class ProgramManager { }; public: + explicit ProgramManager(const Device& device) { + if (device.UseAssemblyShaders()) { + glEnable(GL_COMPUTE_PROGRAM_NV); + } + } + void BindProgram(GLuint program) { if (current_source_program == program) { return; @@ -32,6 +39,17 @@ public: glUseProgram(program); } + void BindComputeAssemblyProgram(GLuint program) { + if (current_compute_assembly_program != program) { + current_compute_assembly_program = program; + glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); + } + if (current_source_program != 0) { + current_source_program = 0; + glUseProgram(0); + } + } + void BindAssemblyPrograms(std::span programs, u32 stage_mask) { const u32 changed_mask = current_assembly_mask ^ stage_mask; @@ -67,6 +85,7 @@ private: u32 current_assembly_mask = 0; std::array current_assembly_programs; + GLuint current_compute_assembly_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4e77ef808..a4805f3da 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -130,6 +130,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, std::unique_ptr context_) : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, + program_manager{device}, rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); -- cgit v1.2.3 From c0e4074721825e2af7be4f1a70408f5edb06597d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:00:55 -0300 Subject: gl_shader_manager: Remove unintentionally committed #pragma --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index df7e1f644..c922bcf82 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,8 +12,6 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" -#pragma optimize("", off) - namespace OpenGL { class ProgramManager { -- cgit v1.2.3 From 54decced922aaa73f4c30d696679f3602c930204 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 May 2021 21:01:41 -0300 Subject: gl_shader_manager: Zero initialize current assembly programs --- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index c922bcf82..5ec57d707 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -82,7 +82,7 @@ private: GLuint current_source_program = 0; u32 current_assembly_mask = 0; - std::array current_assembly_programs; + std::array current_assembly_programs{}; GLuint current_compute_assembly_program = 0; }; -- cgit v1.2.3 From 9e7b6622c25aa858b96bf0f1c7f94223a2f449a2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:12:32 -0300 Subject: shader: Split profile and runtime information in separate structs --- .../backend/glasm/emit_context.cpp | 40 +- src/shader_recompiler/backend/glasm/emit_context.h | 6 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 19 +- src/shader_recompiler/backend/glasm/emit_glasm.h | 6 +- .../backend/spirv/emit_context.cpp | 26 +- src/shader_recompiler/backend/spirv/emit_context.h | 4 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 20 +- src/shader_recompiler/backend/spirv/emit_spirv.h | 6 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 4 +- .../backend/spirv/emit_spirv_special.cpp | 15 +- src/shader_recompiler/profile.h | 13 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 26 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 418 ++++++++++----------- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 5 +- 14 files changed, 300 insertions(+), 308 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e42f186c1..659ff6d17 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -23,23 +23,25 @@ std::string_view InterpDecorator(Interpolation interp) { } } // Anonymous namespace -EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_) - : info{program.info}, profile{profile_} { +EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_) + : profile{profile_}, runtime_info{runtime_info_} { // FIXME: Temporary partial implementation + const auto& info{program.info}; u32 cbuf_index{}; - for (const auto& desc : program.info.constant_buffer_descriptors) { + for (const auto& desc : info.constant_buffer_descriptors) { if (desc.count != 1) { throw NotImplementedException("Constant buffer descriptor array"); } Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } - for (const auto& desc : program.info.storage_buffers_descriptors) { + for (const auto& desc : info.storage_buffers_descriptors) { if (desc.count != 1) { throw NotImplementedException("Storage buffer descriptor array"); } } - if (const size_t num = program.info.storage_buffers_descriptors.size(); num > 0) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); } stage = program.stage; @@ -67,8 +69,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - const auto& generic{program.info.input_generics[index]}; + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const auto& generic{info.input_generics[index]}; if (generic.used) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", InterpDecorator(generic.interpolation), index, attr_stage, index, index); @@ -101,8 +103,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile index, index); } } - for (size_t index = 0; index < program.info.stores_frag_color.size(); ++index) { - if (!program.info.stores_frag_color[index]) { + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index]) { continue; } if (index == 0) { @@ -111,28 +113,28 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < program.info.stores_generics.size(); ++index) { - if (program.info.stores_generics[index]) { + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } - image_buffer_bindings.reserve(program.info.image_buffer_descriptors.size()); - for (const auto& desc : program.info.image_buffer_descriptors) { + image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + for (const auto& desc : info.image_buffer_descriptors) { image_buffer_bindings.push_back(bindings.image); bindings.image += desc.count; } - image_bindings.reserve(program.info.image_descriptors.size()); - for (const auto& desc : program.info.image_descriptors) { + image_bindings.reserve(info.image_descriptors.size()); + for (const auto& desc : info.image_descriptors) { image_bindings.push_back(bindings.image); bindings.image += desc.count; } - texture_buffer_bindings.reserve(program.info.texture_buffer_descriptors.size()); - for (const auto& desc : program.info.texture_buffer_descriptors) { + texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + for (const auto& desc : info.texture_buffer_descriptors) { texture_buffer_bindings.push_back(bindings.texture); bindings.texture += desc.count; } - texture_bindings.reserve(program.info.texture_descriptors.size()); - for (const auto& desc : program.info.texture_descriptors) { + texture_bindings.reserve(info.texture_descriptors.size()); + for (const auto& desc : info.texture_descriptors) { texture_bindings.push_back(bindings.texture); bindings.texture += desc.count; } diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h index e76ed1d7c..1f057fdd5 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.h +++ b/src/shader_recompiler/backend/glasm/emit_context.h @@ -16,6 +16,7 @@ namespace Shader { struct Info; struct Profile; +struct RuntimeInfo; } // namespace Shader namespace Shader::Backend { @@ -31,7 +32,8 @@ namespace Shader::Backend::GLASM { class EmitContext { public: - explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, + const RuntimeInfo& runtime_info_); template void Add(const char* format_str, IR::Inst& inst, Args&&... args) { @@ -56,8 +58,8 @@ public: std::string code; RegAlloc reg_alloc{*this}; - const Info& info; const Profile& profile; + const RuntimeInfo& runtime_info; std::vector texture_buffer_bindings; std::vector image_buffer_bindings; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index f110fd7f8..edff04a44 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -374,8 +374,9 @@ std::string_view GetTessSpacing(TessSpacing spacing) { } } // Anonymous namespace -std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bindings) { - EmitContext ctx{program, bindings, profile}; +std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, + Bindings& bindings) { + EmitContext ctx{program, bindings, profile, runtime_info}; Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; @@ -385,18 +386,18 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi header += fmt::format("VERTICES_OUT {};", program.invocations); break; case Stage::TessellationEval: - header += - fmt::format("TESS_MODE {};" - "TESS_SPACING {};" - "TESS_VERTEX_ORDER {};", - GetTessMode(profile.tess_primitive), GetTessSpacing(profile.tess_spacing), - profile.tess_clockwise ? "CW" : "CCW"); + header += fmt::format("TESS_MODE {};" + "TESS_SPACING {};" + "TESS_VERTEX_ORDER {};", + GetTessMode(runtime_info.tess_primitive), + GetTessSpacing(runtime_info.tess_spacing), + runtime_info.tess_clockwise ? "CW" : "CCW"); break; case Stage::Geometry: header += fmt::format("PRIMITIVE_IN {};" "PRIMITIVE_OUT {};" "VERTICES_OUT {};", - InputPrimitive(profile.input_topology), + InputPrimitive(runtime_info.input_topology), OutputPrimitive(program.output_topology), program.output_vertices); break; case Stage::Compute: diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index a0dfdd818..3d02d873e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -12,12 +12,12 @@ namespace Shader::Backend::GLASM { -[[nodiscard]] std::string EmitGLASM(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitGLASM(profile, program, binding); + return EmitGLASM(profile, {}, program, binding); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index a98e08392..3e8899f53 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -136,7 +136,7 @@ Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, break; case Stage::Geometry: if (per_invocation) { - const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; + const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; type = ctx.TypeArray(type, ctx.Const(num_vertices)); } break; @@ -161,8 +161,8 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional invo while (element < 4) { const u32 remainder{4 - element}; const TransformFeedbackVarying* xfb_varying{}; - if (!ctx.profile.xfb_varyings.empty()) { - xfb_varying = &ctx.profile.xfb_varyings[base_attr_index + element]; + if (!ctx.runtime_info.xfb_varyings.empty()) { + xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; } const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; @@ -208,7 +208,7 @@ Id GetAttributeType(EmitContext& ctx, AttributeType type) { } std::optional AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.profile.generic_input_types.at(index)}; + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: return AttrInfo{ctx.input_f32, ctx.F32[1], false}; @@ -441,13 +441,15 @@ void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_vie } } -EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& binding) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { +EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, + IR::Program& program, Bindings& bindings) + : Sirit::Module(profile_.supported_spirv), profile{profile_}, + runtime_info{runtime_info_}, stage{program.stage} { const bool is_unified{profile.unified_descriptor_binding}; - u32& uniform_binding{is_unified ? binding.unified : binding.uniform_buffer}; - u32& storage_binding{is_unified ? binding.unified : binding.storage_buffer}; - u32& texture_binding{is_unified ? binding.unified : binding.texture}; - u32& image_binding{is_unified ? binding.unified : binding.image}; + u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; + u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; + u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; + u32& image_binding{is_unified ? bindings.unified : bindings.image}; AddCapability(spv::Capability::Shader); DefineCommonTypes(program.info); DefineCommonConstants(); @@ -1211,7 +1213,7 @@ void EmitContext::DefineInputs(const Info& info) { if (!generic.used) { continue; } - const AttributeType input_type{profile.generic_input_types[index]}; + const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1256,7 +1258,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { if (info.stores_position || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || profile.fixed_state_point_size) { + if (info.stores_point_size || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index d2b79f6c1..961c9180c 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -103,7 +103,8 @@ struct GenericElementInfo { class EmitContext final : public Sirit::Module { public: - explicit EmitContext(const Profile& profile, IR::Program& program, Bindings& binding); + explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& binding); ~EmitContext(); [[nodiscard]] Id Def(const IR::Value& value); @@ -150,6 +151,7 @@ public: } const Profile& profile; + const RuntimeInfo& runtime_info; Stage stage{}; Id void_id{}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 3e20ac3b9..cba420cda 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -226,16 +226,17 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { case Stage::TessellationEval: execution_model = spv::ExecutionModel::TessellationEvaluation; ctx.AddCapability(spv::Capability::Tessellation); - ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_primitive)); - ctx.AddExecutionMode(main, ExecutionMode(ctx.profile.tess_spacing)); - ctx.AddExecutionMode(main, ctx.profile.tess_clockwise ? spv::ExecutionMode::VertexOrderCw - : spv::ExecutionMode::VertexOrderCcw); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); + ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise + ? spv::ExecutionMode::VertexOrderCw + : spv::ExecutionMode::VertexOrderCcw); break; case Stage::Geometry: execution_model = spv::ExecutionModel::Geometry; ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::GeometryStreams); - switch (ctx.profile.input_topology) { + switch (ctx.runtime_info.input_topology) { case InputTopology::Points: ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); break; @@ -279,7 +280,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { if (program.info.stores_frag_depth) { ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } - if (ctx.profile.force_early_z) { + if (ctx.runtime_info.force_early_z) { ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); } break; @@ -402,7 +403,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct if (info.uses_sample_id) { ctx.AddCapability(spv::Capability::SampleRateShading); } - if (!ctx.profile.xfb_varyings.empty()) { + if (!ctx.runtime_info.xfb_varyings.empty()) { ctx.AddCapability(spv::Capability::TransformFeedback); } if (info.uses_derivatives) { @@ -433,8 +434,9 @@ void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { } } // Anonymous namespace -std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Bindings& binding) { - EmitContext ctx{profile, program, binding}; +std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings) { + EmitContext ctx{profile, runtime_info, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); if (profile.support_float_controls) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index d8ab2d8ed..db0c935fe 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -16,12 +16,12 @@ namespace Shader::Backend::SPIRV { -[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, IR::Program& program, - Bindings& binding); +[[nodiscard]] std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program, Bindings& bindings); [[nodiscard]] inline std::vector EmitSPIRV(const Profile& profile, IR::Program& program) { Bindings binding; - return EmitSPIRV(profile, program, binding); + return EmitSPIRV(profile, {}, program, binding); } } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8e57ff070..c1b69c234 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -17,7 +17,7 @@ struct AttrInfo { }; std::optional AttrTypes(EmitContext& ctx, u32 index) { - const AttributeType type{ctx.profile.generic_input_types.at(index)}; + const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; switch (type) { case AttributeType::Float: return AttrInfo{ctx.input_f32, ctx.F32[1], false}; @@ -468,7 +468,7 @@ Id EmitIsHelperInvocation(EmitContext& ctx) { } Id EmitYDirection(EmitContext& ctx) { - return ctx.Const(ctx.profile.y_negate ? -1.0f : 1.0f); + return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); } Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index ba948f3c9..072a3b1bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -18,8 +18,8 @@ void ConvertDepthMode(EmitContext& ctx) { } void SetFixedPipelinePointSize(EmitContext& ctx) { - if (ctx.profile.fixed_state_point_size) { - const float point_size{*ctx.profile.fixed_state_point_size}; + if (ctx.runtime_info.fixed_state_point_size) { + const float point_size{*ctx.runtime_info.fixed_state_point_size}; ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); } } @@ -62,7 +62,10 @@ Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1 } void AlphaTest(EmitContext& ctx) { - const auto comparison{*ctx.profile.alpha_test_func}; + if (!ctx.runtime_info.alpha_test_func) { + return; + } + const auto comparison{*ctx.runtime_info.alpha_test_func}; if (comparison == CompareFunction::Always) { return; } @@ -76,7 +79,7 @@ void AlphaTest(EmitContext& ctx) { const Id true_label{ctx.OpLabel()}; const Id discard_label{ctx.OpLabel()}; - const Id alpha_reference{ctx.Const(ctx.profile.alpha_test_reference)}; + const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); @@ -113,7 +116,7 @@ void EmitPrologue(EmitContext& ctx) { } void EmitEpilogue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { + if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { ConvertDepthMode(ctx); } if (ctx.stage == Stage::Fragment) { @@ -122,7 +125,7 @@ void EmitEpilogue(EmitContext& ctx) { } void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { - if (ctx.profile.convert_depth_mode) { + if (ctx.runtime_info.convert_depth_mode) { ConvertDepthMode(ctx); } if (stream.IsImmediate()) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 12699511a..c46452c3d 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -81,19 +81,22 @@ struct Profile { bool support_viewport_mask{}; bool support_typeless_image_loads{}; bool support_demote_to_helper_invocation{}; - bool warp_size_potentially_larger_than_guest{}; bool support_int64_atomics{}; + + bool warp_size_potentially_larger_than_guest{}; bool lower_left_origin_mode{}; - // FClamp is broken and OpFMax + OpFMin should be used instead + /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; - // Offset image operands with an unsigned type do not work + /// Offset image operands with an unsigned type do not work bool has_broken_unsigned_image_offsets{}; - // Signed instructions with unsigned data types are misinterpreted + /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; - // Ignores SPIR-V ordered vs unordered using GLSL semantics + /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; +}; +struct RuntimeInfo { std::array generic_input_types{}; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b84b36b9d..d7efbdd01 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,33 +61,15 @@ const Shader::Profile profile{ .support_viewport_mask = true, .support_typeless_image_loads = true, .support_demote_to_helper_invocation = false, - .warp_size_potentially_larger_than_guest = true, .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, .lower_left_origin_mode = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, .ignore_nan_fp_comparisons = true, - - .generic_input_types = {}, - .convert_depth_mode = false, - .force_early_z = false, - - .tess_primitive = {}, - .tess_spacing = {}, - .tess_clockwise = false, - - .input_topology = Shader::InputTopology::Triangles, - - .fixed_state_point_size = std::nullopt, - - .alpha_test_func = Shader::CompareFunction::Always, - .alpha_test_reference = 0.0f, - - .y_negate = false, - - .xfb_varyings = {}, }; using Shader::Backend::GLASM::EmitGLASM; @@ -302,10 +284,10 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program, binding)}; + const std::string code{EmitGLASM(profile, {}, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, program, binding)}; + const std::vector code{EmitSPIRV(profile, {}, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7830c0194..88db10b75 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -89,6 +89,208 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); return {}; } + +static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { + if (attr.enabled == 0) { + return Shader::AttributeType::Disabled; + } + switch (attr.Type()) { + case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: + return Shader::AttributeType::Float; + case Maxwell::VertexAttribute::Type::SignedInt: + return Shader::AttributeType::SignedInt; + case Maxwell::VertexAttribute::Type::UnsignedInt: + return Shader::AttributeType::UnsignedInt; + } + return Shader::AttributeType::Float; +} + +std::vector MakeTransformFeedbackVaryings( + const GraphicsPipelineCacheKey& key) { + static constexpr std::array VECTORS{ + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] + }; + std::vector xfb(256); + for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = key.state.xfb_state.varyings[buffer]; + const auto& layout = key.state.xfb_state.layouts[buffer]; + const u32 varying_count = layout.varying_count; + u32 highest = 0; + for (u32 offset = 0; offset < varying_count; ++offset) { + const u32 base_offset = offset; + const u8 location = locations[offset]; + + Shader::TransformFeedbackVarying varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * 4; + varying.components = 1; + + if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + xfb[location] = varying; + highest = std::max(highest, (base_offset + varying.components) * 4); + } + UNIMPLEMENTED_IF(highest != layout.stride); + } + return xfb; +} + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, + const Shader::IR::Program& program) { + Shader::RuntimeInfo info; + + const Shader::Stage stage{program.stage}; + const bool has_geometry{key.unique_hashes[4] != 0}; + const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; + const float point_size{Common::BitCast(key.state.point_size)}; + switch (stage) { + case Shader::Stage::VertexB: + if (!has_geometry) { + if (key.state.topology == Maxwell::PrimitiveTopology::Points) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + } + std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), + &CastAttributeType); + break; + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.state.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + const u32 raw{key.state.tessellation_primitive.Value()}; + switch (static_cast(raw)) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + const u32 raw{key.state.tessellation_spacing}; + switch (static_cast(raw)) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + if (program.output_topology == Shader::OutputTopology::PointList) { + info.fixed_state_point_size = point_size; + } + if (key.state.xfb_enabled != 0) { + info.xfb_varyings = MakeTransformFeedbackVaryings(key); + } + info.convert_depth_mode = gl_ndc; + break; + case Shader::Stage::Fragment: + info.alpha_test_func = MaxwellToCompareFunction( + key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); + info.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); + break; + default: + break; + } + switch (key.state.topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + info.force_early_z = key.state.early_z != 0; + info.y_negate = key.state.y_negate != 0; + return info; +} } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -124,7 +326,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; - base_profile = Shader::Profile{ + profile = Shader::Profile{ .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, @@ -153,14 +355,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_viewport_mask = device.IsNvViewportArray2Supported(), .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, - .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, - .generic_input_types{}, - .fixed_state_point_size{}, - .alpha_test_func{}, - .xfb_varyings{}, }; } @@ -329,8 +527,8 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::Profile profile{MakeProfile(key, program)}; - const std::vector code{EmitSPIRV(profile, program, binding)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { @@ -391,7 +589,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; - const std::vector code{EmitSPIRV(base_profile, program)}; + const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { @@ -403,206 +601,4 @@ std::unique_ptr PipelineCache::CreateComputePipeline( thread_worker, program.info, std::move(spv_module)); } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { - if (attr.enabled == 0) { - return Shader::AttributeType::Disabled; - } - switch (attr.Type()) { - case Maxwell::VertexAttribute::Type::SignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedNorm: - case Maxwell::VertexAttribute::Type::UnsignedScaled: - case Maxwell::VertexAttribute::Type::SignedScaled: - case Maxwell::VertexAttribute::Type::Float: - return Shader::AttributeType::Float; - case Maxwell::VertexAttribute::Type::SignedInt: - return Shader::AttributeType::SignedInt; - case Maxwell::VertexAttribute::Type::UnsignedInt: - return Shader::AttributeType::UnsignedInt; - } - return Shader::AttributeType::Float; -} - -static std::vector MakeTransformFeedbackVaryings( - const GraphicsPipelineCacheKey& key) { - static constexpr std::array VECTORS{ - 28, // gl_Position - 32, // Generic 0 - 36, // Generic 1 - 40, // Generic 2 - 44, // Generic 3 - 48, // Generic 4 - 52, // Generic 5 - 56, // Generic 6 - 60, // Generic 7 - 64, // Generic 8 - 68, // Generic 9 - 72, // Generic 10 - 76, // Generic 11 - 80, // Generic 12 - 84, // Generic 13 - 88, // Generic 14 - 92, // Generic 15 - 96, // Generic 16 - 100, // Generic 17 - 104, // Generic 18 - 108, // Generic 19 - 112, // Generic 20 - 116, // Generic 21 - 120, // Generic 22 - 124, // Generic 23 - 128, // Generic 24 - 132, // Generic 25 - 136, // Generic 26 - 140, // Generic 27 - 144, // Generic 28 - 148, // Generic 29 - 152, // Generic 30 - 156, // Generic 31 - 160, // gl_FrontColor - 164, // gl_FrontSecondaryColor - 160, // gl_BackColor - 164, // gl_BackSecondaryColor - 192, // gl_TexCoord[0] - 196, // gl_TexCoord[1] - 200, // gl_TexCoord[2] - 204, // gl_TexCoord[3] - 208, // gl_TexCoord[4] - 212, // gl_TexCoord[5] - 216, // gl_TexCoord[6] - 220, // gl_TexCoord[7] - }; - std::vector xfb(256); - for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { - const auto& locations = key.state.xfb_state.varyings[buffer]; - const auto& layout = key.state.xfb_state.layouts[buffer]; - const u32 varying_count = layout.varying_count; - u32 highest = 0; - for (u32 offset = 0; offset < varying_count; ++offset) { - const u32 base_offset = offset; - const u8 location = locations[offset]; - - Shader::TransformFeedbackVarying varying; - varying.buffer = layout.stream; - varying.stride = layout.stride; - varying.offset = offset * 4; - varying.components = 1; - - if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) { - UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); - - const u8 base_index = location / 4; - while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { - ++offset; - ++varying.components; - } - } - xfb[location] = varying; - highest = std::max(highest, (base_offset + varying.components) * 4); - } - UNIMPLEMENTED_IF(highest != layout.stride); - } - return xfb; -} - -Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { - Shader::Profile profile{base_profile}; - - const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; - const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; - const float point_size{Common::BitCast(key.state.point_size)}; - switch (stage) { - case Shader::Stage::VertexB: - if (!has_geometry) { - if (key.state.topology == Maxwell::PrimitiveTopology::Points) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - } - std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), - &CastAttributeType); - break; - case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - profile.tess_clockwise = key.state.tessellation_clockwise == 0; - profile.tess_primitive = [&key] { - const u32 raw{key.state.tessellation_primitive.Value()}; - switch (static_cast(raw)) { - case Maxwell::TessellationPrimitive::Isolines: - return Shader::TessPrimitive::Isolines; - case Maxwell::TessellationPrimitive::Triangles: - return Shader::TessPrimitive::Triangles; - case Maxwell::TessellationPrimitive::Quads: - return Shader::TessPrimitive::Quads; - } - UNREACHABLE(); - return Shader::TessPrimitive::Triangles; - }(); - profile.tess_spacing = [&] { - const u32 raw{key.state.tessellation_spacing}; - switch (static_cast(raw)) { - case Maxwell::TessellationSpacing::Equal: - return Shader::TessSpacing::Equal; - case Maxwell::TessellationSpacing::FractionalOdd: - return Shader::TessSpacing::FractionalOdd; - case Maxwell::TessellationSpacing::FractionalEven: - return Shader::TessSpacing::FractionalEven; - } - UNREACHABLE(); - return Shader::TessSpacing::Equal; - }(); - break; - case Shader::Stage::Geometry: - if (program.output_topology == Shader::OutputTopology::PointList) { - profile.fixed_state_point_size = point_size; - } - if (key.state.xfb_enabled != 0) { - profile.xfb_varyings = MakeTransformFeedbackVaryings(key); - } - profile.convert_depth_mode = gl_ndc; - break; - case Shader::Stage::Fragment: - profile.alpha_test_func = MaxwellToCompareFunction( - key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); - profile.alpha_test_reference = Common::BitCast(key.state.alpha_test_ref); - break; - default: - break; - } - switch (key.state.topology) { - case Maxwell::PrimitiveTopology::Points: - profile.input_topology = Shader::InputTopology::Points; - break; - case Maxwell::PrimitiveTopology::Lines: - case Maxwell::PrimitiveTopology::LineLoop: - case Maxwell::PrimitiveTopology::LineStrip: - profile.input_topology = Shader::InputTopology::Lines; - break; - case Maxwell::PrimitiveTopology::Triangles: - case Maxwell::PrimitiveTopology::TriangleStrip: - case Maxwell::PrimitiveTopology::TriangleFan: - case Maxwell::PrimitiveTopology::Quads: - case Maxwell::PrimitiveTopology::QuadStrip: - case Maxwell::PrimitiveTopology::Polygon: - case Maxwell::PrimitiveTopology::Patches: - profile.input_topology = Shader::InputTopology::Triangles; - break; - case Maxwell::PrimitiveTopology::LinesAdjacency: - case Maxwell::PrimitiveTopology::LineStripAdjacency: - profile.input_topology = Shader::InputTopology::LinesAdjacency; - break; - case Maxwell::PrimitiveTopology::TrianglesAdjacency: - case Maxwell::PrimitiveTopology::TriangleStripAdjacency: - profile.input_topology = Shader::InputTopology::TrianglesAdjacency; - break; - } - profile.force_early_z = key.state.early_z != 0; - profile.y_negate = key.state.y_negate != 0; - return profile; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 4e48b4956..4116cc73f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -129,9 +129,6 @@ private: Shader::Environment& env, bool build_in_parallel); - Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program); - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -148,7 +145,7 @@ private: ShaderPools main_pools; - Shader::Profile base_profile; + Shader::Profile profile; std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; -- cgit v1.2.3 From c07cc9d6a560d14e25ec59974ae5a15a7842d779 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 02:57:52 -0300 Subject: gl_shader_cache: Pass shader runtime information --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 76 +++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d7efbdd01..b4f26dd74 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -184,6 +184,76 @@ GLenum AssemblyStage(size_t stage_index) { UNREACHABLE_MSG("{}", stage_index); return GL_NONE; } + +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, + const Shader::IR::Program& program) { + UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); + + Shader::RuntimeInfo info; + switch (program.stage) { + case Shader::Stage::TessellationEval: + // We have to flip tessellation clockwise for some reason... + info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_primitive = [&key] { + switch (key.tessellation_primitive) { + case Maxwell::TessellationPrimitive::Isolines: + return Shader::TessPrimitive::Isolines; + case Maxwell::TessellationPrimitive::Triangles: + return Shader::TessPrimitive::Triangles; + case Maxwell::TessellationPrimitive::Quads: + return Shader::TessPrimitive::Quads; + } + UNREACHABLE(); + return Shader::TessPrimitive::Triangles; + }(); + info.tess_spacing = [&] { + switch (key.tessellation_spacing) { + case Maxwell::TessellationSpacing::Equal: + return Shader::TessSpacing::Equal; + case Maxwell::TessellationSpacing::FractionalOdd: + return Shader::TessSpacing::FractionalOdd; + case Maxwell::TessellationSpacing::FractionalEven: + return Shader::TessSpacing::FractionalEven; + } + UNREACHABLE(); + return Shader::TessSpacing::Equal; + }(); + break; + case Shader::Stage::Geometry: + + break; + default: + break; + } + switch (key.gs_input_topology) { + case Maxwell::PrimitiveTopology::Points: + info.input_topology = Shader::InputTopology::Points; + break; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineLoop: + case Maxwell::PrimitiveTopology::LineStrip: + info.input_topology = Shader::InputTopology::Lines; + break; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + case Maxwell::PrimitiveTopology::Quads: + case Maxwell::PrimitiveTopology::QuadStrip: + case Maxwell::PrimitiveTopology::Polygon: + case Maxwell::PrimitiveTopology::Patches: + info.input_topology = Shader::InputTopology::Triangles; + break; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + info.input_topology = Shader::InputTopology::LinesAdjacency; + break; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + info.input_topology = Shader::InputTopology::TrianglesAdjacency; + break; + } + return info; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -283,11 +353,13 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( Shader::IR::Program& program{programs[index]}; const size_t stage_index{index - 1}; infos[stage_index] = &program.info; + + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, {}, program, binding)}; + const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, {}, program, binding)}; + const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; AddShader(Stage(stage_index), source_program.handle, code); } } -- cgit v1.2.3 From 6bc54e12a0d274beee0cb7584f73429112ec98b2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:17:53 -0300 Subject: glasm: Set transform feedback state --- .../renderer_opengl/gl_graphics_program.cpp | 90 +++++++++++++++++++- .../renderer_opengl/gl_graphics_program.h | 32 ++++--- src/video_core/renderer_opengl/gl_rasterizer.cpp | 98 ++-------------------- src/video_core/renderer_opengl/gl_rasterizer.h | 6 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 ++++- 5 files changed, 132 insertions(+), 113 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index b5d75aa13..9677a3ed6 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -12,7 +12,7 @@ #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { - +namespace { using Shader::ImageBufferDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; @@ -20,6 +20,35 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + size_t GraphicsProgramKey::Hash() const noexcept { return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); } @@ -34,7 +63,8 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos) + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( @@ -74,6 +104,10 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } } struct Spec { @@ -302,4 +336,56 @@ void GraphicsProgram::Configure(bool is_indexed) { } } +void GraphicsProgram::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsProgram::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h index 18292bb16..53a57ede5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ b/src/video_core/renderer_opengl/gl_graphics_program.h @@ -16,6 +16,7 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" namespace OpenGL { @@ -24,16 +25,6 @@ class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct GraphicsProgramKey { - struct TransformFeedbackState { - struct Layout { - u32 stream; - u32 varying_count; - u32 stride; - }; - std::array layouts; - std::array, Maxwell::NumTransformFeedbackBuffers> varyings; - }; - std::array unique_hashes; union { u32 raw; @@ -45,7 +36,7 @@ struct GraphicsProgramKey { BitField<10, 1, u32> tessellation_clockwise; }; std::array padding; - TransformFeedbackState xfb_state; + VideoCommon::TransformFeedbackState xfb_state; size_t Hash() const noexcept; @@ -75,11 +66,22 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, std::array assembly_programs_, - const std::array& infos); + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); void Configure(bool is_indexed); + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -96,6 +98,12 @@ private: std::array base_storage_bindings{}; std::array num_texture_buffers{}; std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4834d58f0..51ff42ee9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -51,37 +51,8 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); namespace { - constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} - void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } @@ -253,7 +224,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { program->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(primitive_mode); + BeginTransformFeedback(program, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -1025,68 +996,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::SyncTransformFeedback() { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs = maxwell3d.regs; - - static constexpr std::size_t STRIDE = 3; - std::array attribs; - std::array streams; - - GLint* cursor = attribs.data(); - GLint* current_stream = streams.data(); - - for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - - *current_stream = static_cast(feedback); - if (current_stream != streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += STRIDE; - } - } - - const GLsizei num_attribs = static_cast((cursor - attribs.data()) / STRIDE); - const GLsizei num_strides = static_cast(current_stream - streams.data()); - glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), - GL_INTERLEAVED_ATTRIBS); -} - -void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } - if (device.UseAssemblyShaders()) { - SyncTransformFeedback(); - } + program->ConfigureTransformFeedback(); + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); @@ -1100,11 +1016,9 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = maxwell3d.regs; - if (regs.tfb_enabled == 0) { - return; + if (maxwell3d.regs.tfb_enabled != 0) { + glEndTransformFeedback(); } - glEndTransformFeedback(); } AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2fdcbe4ba..08f509c19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -193,12 +193,8 @@ private: /// Syncs vertex instances to match the guest state void SyncVertexInstances(); - /// Syncs transform feedback state to match guest state - /// @note Only valid on assembly shaders - void SyncTransformFeedback(); - /// Begin a transform feedback - void BeginTransformFeedback(GLenum primitive_mode); + void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4f26dd74..0a0f1324f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -254,6 +254,17 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, } return info; } + +void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { + std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { + return VideoCommon::TransformFeedbackState::Layout{ + .stream = layout.stream, + .varying_count = layout.varying_count, + .stride = layout.stride, + }; + }); + state.varyings = regs.tfb_varying_locs; +} } // Anonymous namespace ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, @@ -282,7 +293,10 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); - + graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0); + if (graphics_key.xfb_enabled) { + SetXfbState(graphics_key.xfb_state, regs); + } const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { @@ -368,7 +382,8 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( } return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos); + std::move(source_program), std::move(assembly_programs), infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } std::unique_ptr ShaderCache::CreateComputeProgram( -- cgit v1.2.3 From 84feabac881443d27f84f8fec5eba6dc3b13b620 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 18:27:37 -0300 Subject: glasm: Implement forced early Z --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 8 ++++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index edff04a44..0c2bbf284 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -261,7 +261,8 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } } -void SetupOptions(const IR::Program& program, const Profile& profile, std::string& header) { +void SetupOptions(const IR::Program& program, const Profile& profile, + const RuntimeInfo& runtime_info, std::string& header) { const Info& info{program.info}; const Stage stage{program.stage}; @@ -296,6 +297,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin header += "OPTION NV_viewport_array2;"; } } + if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { + header += "OPTION NV_early_fragment_tests;"; + } const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { header += "OPTION ARB_draw_buffers;"; @@ -380,7 +384,7 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I Precolor(ctx, program); EmitCode(ctx, program); std::string header{StageHeader(program.stage)}; - SetupOptions(program, profile, header); + SetupOptions(program, profile, runtime_info, header); switch (program.stage) { case Stage::TessellationControl: header += fmt::format("VERTICES_OUT {};", program.invocations); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a0f1324f..e678b4bb2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -219,8 +219,8 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, return Shader::TessSpacing::Equal; }(); break; - case Shader::Stage::Geometry: - + case Shader::Stage::Fragment: + info.force_early_z = key.early_z != 0; break; default: break; -- cgit v1.2.3 From df406246d9117ba1c428d81ba7466ba0291ece3c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:36:30 -0300 Subject: gl_shader_cache: Improve GLASM error print logic --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e678b4bb2..747a133fb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -140,13 +140,16 @@ OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { glGenProgramsARB(1, &program.handle); glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, static_cast(code.size()), code.data()); - if (!Settings::values.renderer_debug) { - return program; - } - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - LOG_CRITICAL(Render_OpenGL, "{}", err); - LOG_INFO(Render_OpenGL, "{}", code); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } + } } return program; } -- cgit v1.2.3 From c31521512fd49603ea42c93e2a6eac5d7985cd78 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:46:40 -0300 Subject: gl_shader_cache,glasm: Conditionally use typeless image reads extension --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 74 +++++++++++----------- src/video_core/renderer_opengl/gl_shader_cache.h | 2 + 3 files changed, 43 insertions(+), 39 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 0c2bbf284..8718cc7ec 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -271,8 +271,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, "OPTION NV_shader_storage_buffer;" "OPTION NV_gpu_program_fp64;" "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;" - "OPTION EXT_shader_image_load_formatted;"; + "OPTION ARB_derivative_control;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -297,6 +296,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_viewport_array2;"; } } + if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { + header += "OPTION EXT_shader_image_load_formatted;"; + } if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 747a133fb..2c0510f11 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -36,42 +36,6 @@ namespace OpenGL { namespace { -// FIXME: Move this somewhere else -const Shader::Profile profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = true, - .support_viewport_mask = true, - .support_typeless_image_loads = true, - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - - .warp_size_potentially_larger_than_guest = true, - .lower_left_origin_mode = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, -}; - using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; @@ -279,7 +243,43 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} {} + state_tracker_} { + profile = Shader::Profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = true, + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + + .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + }; +} ShaderCache::~ShaderCache() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b479d073a..b49cd0ac7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -86,6 +86,8 @@ private: ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; + + Shader::Profile profile; }; } // namespace OpenGL -- cgit v1.2.3 From 1bccb43cbecdbf069f5c86086670a8d5440408e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 21 May 2021 20:47:48 -0300 Subject: gl_shader_cache: Conditionally use viewport mask --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2c0510f11..cf03280fa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,7 +266,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_vote = true, .support_viewport_index_layer_non_geometry = device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = true, + .support_viewport_mask = device.HasNvViewportArray2(), .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, -- cgit v1.2.3 From 80884e32701e1e93fded045be4c235ff143d6ea0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 23 May 2021 21:24:24 -0300 Subject: gl_graphics_program: Fix texture buffer bindings --- .../renderer_opengl/gl_graphics_program.cpp | 59 +++++++++++++--------- 1 file changed, 35 insertions(+), 24 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp index 9677a3ed6..7c3d23f85 100644 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "common/cityhash.h" @@ -14,12 +15,24 @@ namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; using Tegra::Texture::TexturePair; using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -77,30 +90,25 @@ GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buff } u32 num_textures{}; u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - for (const auto& desc : info.constant_buffer_descriptors) { - base_uniform_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.storage_buffers_descriptors) { - base_storage_bindings[stage + 1] += desc.count; - } - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers[stage] += desc.count; - num_textures += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers[stage] += desc.count; - num_images += desc.count; - } - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); @@ -151,8 +159,8 @@ void GraphicsProgram::Configure(bool is_indexed) { const u32 index_offset{index << desc.size_shift}; const u32 offset{desc.cbuf_offset + index_offset}; const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { + if constexpr (std::is_same_v || + std::is_same_v) { if (desc.has_secondary) { ASSERT(cbufs[desc.secondary_cbuf_index].enabled); const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; @@ -297,6 +305,9 @@ void GraphicsProgram::Configure(bool is_indexed) { texture_binding += num_texture_buffers[stage]; image_binding += num_image_buffers[stage]; + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + const auto& info{stage_infos[stage]}; for (const auto& desc : info.texture_descriptors) { for (u32 index = 0; index < desc.count; ++index) { -- cgit v1.2.3 From 40179282137370380387cab2610dcf21bd709efa Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 24 May 2021 03:24:19 -0300 Subject: gl_shader_cache: Do not flip tessellation on OpenGL --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cf03280fa..ceec83a8a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -159,8 +159,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: - // We have to flip tessellation clockwise for some reason... - info.tess_clockwise = key.tessellation_clockwise == 0; + info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { switch (key.tessellation_primitive) { case Maxwell::TessellationPrimitive::Isolines: -- cgit v1.2.3 From eacf18cce9a05a28f50e916a752c04b0c9337707 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 18:58:28 -0300 Subject: gl_shader_cache: Rename Program abstractions into Pipeline --- src/video_core/CMakeLists.txt | 8 +- .../renderer_opengl/gl_compute_pipeline.cpp | 182 ++++++++++ .../renderer_opengl/gl_compute_pipeline.h | 84 +++++ .../renderer_opengl/gl_compute_program.cpp | 182 ---------- .../renderer_opengl/gl_compute_program.h | 84 ----- .../renderer_opengl/gl_graphics_pipeline.cpp | 402 +++++++++++++++++++++ .../renderer_opengl/gl_graphics_pipeline.h | 118 ++++++ .../renderer_opengl/gl_graphics_program.cpp | 402 --------------------- .../renderer_opengl/gl_graphics_program.h | 118 ------ src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 +- src/video_core/renderer_opengl/gl_rasterizer.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 42 +-- src/video_core/renderer_opengl/gl_shader_cache.h | 32 +- src/video_core/renderer_opengl/gl_shader_manager.h | 2 +- 14 files changed, 834 insertions(+), 834 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_compute_pipeline.cpp create mode 100644 src/video_core/renderer_opengl/gl_compute_pipeline.h delete mode 100644 src/video_core/renderer_opengl/gl_compute_program.cpp delete mode 100644 src/video_core/renderer_opengl/gl_compute_program.h create mode 100644 src/video_core/renderer_opengl/gl_graphics_pipeline.cpp create mode 100644 src/video_core/renderer_opengl/gl_graphics_pipeline.h delete mode 100644 src/video_core/renderer_opengl/gl_graphics_program.cpp delete mode 100644 src/video_core/renderer_opengl/gl_graphics_program.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8250f736c..1ef3a6189 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -67,14 +67,14 @@ add_library(video_core STATIC renderer_base.h renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h - renderer_opengl/gl_compute_program.cpp - renderer_opengl/gl_compute_program.h + renderer_opengl/gl_compute_pipeline.cpp + renderer_opengl/gl_compute_pipeline.h renderer_opengl/gl_device.cpp renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_graphics_program.cpp - renderer_opengl/gl_graphics_program.h + renderer_opengl/gl_graphics_pipeline.cpp + renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp new file mode 100644 index 000000000..700ebd8b8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -0,0 +1,182 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "common/cityhash.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace OpenGL { + +using Shader::ImageBufferDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 16; + +size_t ComputePipelineKey::Hash() const noexcept { + return static_cast( + Common::CityHash64(reinterpret_cast(this), sizeof *this)); +} + +bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, sizeof *this) == 0; +} + +ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, + source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + for (const auto& desc : info.texture_buffer_descriptors) { + num_texture_buffers += desc.count; + } + for (const auto& desc : info.image_buffer_descriptors) { + num_image_buffers += desc.count; + } + u32 num_textures = num_texture_buffers; + for (const auto& desc : info.texture_descriptors) { + num_textures += desc.count; + } + ASSERT(num_textures <= MAX_TEXTURES); + + u32 num_images = num_image_buffers; + for (const auto& desc : info.image_descriptors) { + num_images += desc.count; + } + ASSERT(num_images <= MAX_IMAGES); +} + +void ComputePipeline::Configure() { + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.UnbindComputeStorageBuffers(); + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, + desc.is_written); + ++ssbo_index; + } + texture_cache.SynchronizeComputeDescriptors(); + + std::array image_view_ids; + boost::container::static_vector image_view_indices; + std::array samplers; + std::array textures; + std::array images; + GLsizei sampler_binding{}; + GLsizei texture_binding{}; + GLsizei image_binding{}; + + const auto& qmd{kepler_compute.launch_description}; + const auto& cbufs{qmd.const_buffer_config}; + const bool via_header_index{qmd.linked_tsc != 0}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); + const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + + secondary_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + return TexturePair(lhs_raw | rhs_raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + } + }}; + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + samplers[sampler_binding++] = 0; + } + } + std::ranges::for_each(info.image_buffer_descriptors, add_image); + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices.push_back(handle.first); + + Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); + samplers[sampler_binding++] = sampler->Handle(); + } + } + std::ranges::for_each(info.image_descriptors, add_image); + + const std::span indices_span(image_view_indices.data(), image_view_indices.size()); + texture_cache.FillComputeImageViews(indices_span, image_view_ids); + + if (assembly_program.handle != 0) { + program_manager.BindComputeAssemblyProgram(assembly_program.handle); + } else { + program_manager.BindProgram(source_program.handle); + } + buffer_cache.UnbindComputeTextureBuffers(); + size_t texbuf_index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; + buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++texbuf_index; + } + }}; + std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); + std::ranges::for_each(info.image_buffer_descriptors, add_buffer); + + buffer_cache.UpdateComputeBuffers(); + + buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); + buffer_cache.BindHostComputeBuffers(); + + const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; + texture_binding += num_texture_buffers; + image_binding += num_image_buffers; + + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h new file mode 100644 index 000000000..e3b94e2f3 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -0,0 +1,84 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines { +class KeplerCompute; +} + +namespace Shader { +struct Info; +} + +namespace OpenGL { + +class ProgramManager; + +struct ComputePipelineKey { + u64 unique_hash; + u32 shared_memory_size; + std::array workgroup_size; + + size_t Hash() const noexcept; + + bool operator==(const ComputePipelineKey&) const noexcept; + + bool operator!=(const ComputePipelineKey& rhs) const noexcept { + return !operator==(rhs); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class ComputePipeline { +public: + explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::KeplerCompute& kepler_compute_, + ProgramManager& program_manager_, const Shader::Info& info_, + OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + + void Configure(); + +private: + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::KeplerCompute& kepler_compute; + ProgramManager& program_manager; + + Shader::Info info; + OGLProgram source_program; + OGLAssemblyProgram assembly_program; + + u32 num_texture_buffers{}; + u32 num_image_buffers{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp deleted file mode 100644 index ce52a0052..000000000 --- a/src/video_core/renderer_opengl/gl_compute_program.cpp +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/cityhash.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" - -namespace OpenGL { - -using Shader::ImageBufferDescriptor; -using Tegra::Texture::TexturePair; -using VideoCommon::ImageId; - -constexpr u32 MAX_TEXTURES = 64; -constexpr u32 MAX_IMAGES = 16; - -size_t ComputeProgramKey::Hash() const noexcept { - return static_cast( - Common::CityHash64(reinterpret_cast(this), sizeof *this)); -} - -bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { - return std::memcmp(this, &rhs, sizeof *this) == 0; -} - -ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } - ASSERT(num_textures <= MAX_TEXTURES); - - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } - ASSERT(num_images <= MAX_IMAGES); -} - -void ComputeProgram::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); - buffer_cache.UnbindComputeStorageBuffers(); - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, - desc.is_written); - ++ssbo_index; - } - texture_cache.SynchronizeComputeDescriptors(); - - std::array image_view_ids; - boost::container::static_vector image_view_indices; - std::array samplers; - std::array textures; - std::array images; - GLsizei sampler_binding{}; - GLsizei texture_binding{}; - GLsizei image_binding{}; - - const auto& qmd{kepler_compute.launch_description}; - const auto& cbufs{qmd.const_buffer_config}; - const bool via_header_index{qmd.linked_tsc != 0}; - const auto read_handle{[&](const auto& desc, u32 index) { - ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); - const u32 index_offset{index << desc.size_shift}; - const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { - if (desc.has_secondary) { - ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); - const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; - const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + - secondary_offset}; - const u32 lhs_raw{gpu_memory.Read(addr)}; - const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - return TexturePair(lhs_raw | rhs_raw, via_header_index); - } - } - return TexturePair(gpu_memory.Read(addr), via_header_index); - }}; - const auto add_image{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - } - }}; - for (const auto& desc : info.texture_buffer_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - samplers[sampler_binding++] = 0; - } - } - std::ranges::for_each(info.image_buffer_descriptors, add_image); - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices.push_back(handle.first); - - Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); - samplers[sampler_binding++] = sampler->Handle(); - } - } - std::ranges::for_each(info.image_descriptors, add_image); - - const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - texture_cache.FillComputeImageViews(indices_span, image_view_ids); - - if (assembly_program.handle != 0) { - program_manager.BindComputeAssemblyProgram(assembly_program.handle); - } else { - program_manager.BindProgram(source_program.handle); - } - buffer_cache.UnbindComputeTextureBuffers(); - size_t texbuf_index{}; - const auto add_buffer{[&](const auto& desc) { - constexpr bool is_image = std::is_same_v; - for (u32 i = 0; i < desc.count; ++i) { - bool is_written{false}; - if constexpr (is_image) { - is_written = desc.is_written; - } - ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; - buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written, is_image); - ++texbuf_index; - } - }}; - std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); - std::ranges::for_each(info.image_buffer_descriptors, add_buffer); - - buffer_cache.UpdateComputeBuffers(); - - buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); - buffer_cache.BindHostComputeBuffers(); - - const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; - texture_binding += num_texture_buffers; - image_binding += num_image_buffers; - - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); - } - } - for (const auto& desc : info.image_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); - } - } - if (texture_binding != 0) { - ASSERT(texture_binding == sampler_binding); - glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); - } - if (image_binding != 0) { - glBindImageTextures(0, image_binding, images.data()); - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h deleted file mode 100644 index ddb00dc1d..000000000 --- a/src/video_core/renderer_opengl/gl_compute_program.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" - -namespace Tegra { -class MemoryManager; -} - -namespace Tegra::Engines { -class KeplerCompute; -} - -namespace Shader { -struct Info; -} - -namespace OpenGL { - -class ProgramManager; - -struct ComputeProgramKey { - u64 unique_hash; - u32 shared_memory_size; - std::array workgroup_size; - - size_t Hash() const noexcept; - - bool operator==(const ComputeProgramKey&) const noexcept; - - bool operator!=(const ComputeProgramKey& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class ComputeProgram { -public: - explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::KeplerCompute& kepler_compute_, - ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); - - void Configure(); - -private: - TextureCache& texture_cache; - BufferCache& buffer_cache; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::KeplerCompute& kepler_compute; - ProgramManager& program_manager; - - Shader::Info info; - OGLProgram source_program; - OGLAssemblyProgram assembly_program; - - u32 num_texture_buffers{}; - u32 num_image_buffers{}; -}; - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp new file mode 100644 index 000000000..32df35202 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -0,0 +1,402 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "common/cityhash.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/texture_cache/texture_cache.h" + +namespace OpenGL { +namespace { +using Shader::ImageBufferDescriptor; +using Shader::ImageDescriptor; +using Shader::TextureBufferDescriptor; +using Shader::TextureDescriptor; +using Tegra::Texture::TexturePair; +using VideoCommon::ImageId; + +constexpr u32 MAX_TEXTURES = 64; +constexpr u32 MAX_IMAGES = 8; + +template +u32 AccumulateCount(Range&& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + +/// Translates hardware transform feedback indices +/// @param location Hardware location +/// @return Pair of ARB_transform_feedback3 token stream first and third arguments +/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt +std::pair TransformFeedbackEnum(u8 location) { + const u8 index = location / 4; + if (index >= 8 && index <= 39) { + return {GL_GENERIC_ATTRIB_NV, index - 8}; + } + if (index >= 48 && index <= 55) { + return {GL_TEXTURE_COORD_NV, index - 48}; + } + switch (index) { + case 7: + return {GL_POSITION, 0}; + case 40: + return {GL_PRIMARY_COLOR_NV, 0}; + case 41: + return {GL_SECONDARY_COLOR_NV, 0}; + case 42: + return {GL_BACK_PRIMARY_COLOR_NV, 0}; + case 43: + return {GL_BACK_SECONDARY_COLOR_NV, 0}; + } + UNIMPLEMENTED_MSG("index={}", index); + return {GL_POSITION, 0}; +} +} // Anonymous namespace + +size_t GraphicsPipelineKey::Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); +} + +bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; +} + +GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( + assembly_programs_)} { + std::ranges::transform(infos, stage_infos.begin(), + [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + + for (size_t stage = 0; stage < 5; ++stage) { + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + u32 num_textures{}; + u32 num_images{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + if (assembly_programs[0].handle != 0 && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } +} + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; + +void GraphicsPipeline::Configure(bool is_indexed) { + std::array image_view_ids; + std::array image_view_indices; + std::array samplers; + size_t image_view_index{}; + GLsizei sampler_binding{}; + + texture_cache.SynchronizeGraphicsDescriptors(); + + buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); + buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + + const auto& regs{maxwell3d.regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + const auto config_stage{[&](size_t stage) { + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); + if constexpr (Spec::has_storage_buffers) { + size_t ssbo_index{}; + for (const auto& desc : info.storage_buffers_descriptors) { + ASSERT(desc.count == 1); + buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, + desc.cbuf_offset, desc.is_written); + ++ssbo_index; + } + } + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { + ASSERT(cbufs[desc.cbuf_index].enabled); + const u32 index_offset{index << desc.size_shift}; + const u32 offset{desc.cbuf_offset + index_offset}; + const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; + if constexpr (std::is_same_v || + std::is_same_v) { + if (desc.has_secondary) { + ASSERT(cbufs[desc.secondary_cbuf_index].enabled); + const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; + const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + + second_offset}; + const u32 lhs_raw{gpu_memory.Read(addr)}; + const u32 rhs_raw{gpu_memory.Read(separate_addr)}; + const u32 raw{lhs_raw | rhs_raw}; + return TexturePair(raw, via_header_index); + } + } + return TexturePair(gpu_memory.Read(addr), via_header_index); + }}; + const auto add_image{[&](const auto& desc) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + } + }}; + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + samplers[sampler_binding++] = 0; + } + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_image(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + const auto handle{read_handle(desc, index)}; + image_view_indices[image_view_index++] = handle.first; + + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; + samplers[sampler_binding++] = sampler->Handle(); + } + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + add_image(desc); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + config_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + config_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + config_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + config_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + config_stage(4); + } + const std::span indices_span(image_view_indices.data(), image_view_index); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); + + texture_cache.UpdateRenderTargets(false); + state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); + + ImageId* texture_buffer_index{image_view_ids.data()}; + const auto bind_stage_info{[&](size_t stage) { + size_t index{}; + const auto add_buffer{[&](const auto& desc) { + constexpr bool is_image = std::is_same_v; + for (u32 i = 0; i < desc.count; ++i) { + bool is_written{false}; + if constexpr (is_image) { + is_written = desc.is_written; + } + ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; + buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), + image_view.BufferSize(), image_view.format, + is_written, is_image); + ++index; + ++texture_buffer_index; + } + }}; + const Shader::Info& info{stage_infos[stage]}; + buffer_cache.UnbindGraphicsTextureBuffers(stage); + + if constexpr (Spec::has_texture_buffers) { + for (const auto& desc : info.texture_buffer_descriptors) { + add_buffer(desc); + } + } + if constexpr (Spec::has_image_buffers) { + for (const auto& desc : info.image_buffer_descriptors) { + add_buffer(desc); + } + } + for (const auto& desc : info.texture_descriptors) { + texture_buffer_index += desc.count; + } + if constexpr (Spec::has_images) { + for (const auto& desc : info.image_descriptors) { + texture_buffer_index += desc.count; + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + bind_stage_info(0); + } + if constexpr (Spec::enabled_stages[1]) { + bind_stage_info(1); + } + if constexpr (Spec::enabled_stages[2]) { + bind_stage_info(2); + } + if constexpr (Spec::enabled_stages[3]) { + bind_stage_info(3); + } + if constexpr (Spec::enabled_stages[4]) { + bind_stage_info(4); + } + buffer_cache.UpdateGraphicsBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); + + if (assembly_programs[0].handle != 0) { + program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); + } else { + program_manager.BindProgram(program.handle); + } + const ImageId* views_it{image_view_ids.data()}; + GLsizei texture_binding = 0; + GLsizei image_binding = 0; + std::array textures; + std::array images; + const auto prepare_stage{[&](size_t stage) { + buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); + buffer_cache.BindHostStageBuffers(stage); + + texture_binding += num_texture_buffers[stage]; + image_binding += num_image_buffers[stage]; + + views_it += num_texture_buffers[stage]; + views_it += num_image_buffers[stage]; + + const auto& info{stage_infos[stage]}; + for (const auto& desc : info.texture_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + textures[texture_binding++] = image_view.Handle(desc.type); + } + } + for (const auto& desc : info.image_descriptors) { + for (u32 index = 0; index < desc.count; ++index) { + ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; + images[image_binding++] = image_view.Handle(desc.type); + } + } + }}; + if constexpr (Spec::enabled_stages[0]) { + prepare_stage(0); + } + if constexpr (Spec::enabled_stages[1]) { + prepare_stage(1); + } + if constexpr (Spec::enabled_stages[2]) { + prepare_stage(2); + } + if constexpr (Spec::enabled_stages[3]) { + prepare_stage(3); + } + if constexpr (Spec::enabled_stages[4]) { + prepare_stage(4); + } + if (texture_binding != 0) { + ASSERT(texture_binding == sampler_binding); + glBindTextures(0, texture_binding, textures.data()); + glBindSamplers(0, sampler_binding, samplers.data()); + } + if (image_binding != 0) { + glBindImageTextures(0, image_binding, images.data()); + } +} + +void GraphicsPipeline::GenerateTransformFeedbackState( + const VideoCommon::TransformFeedbackState& xfb_state) { + // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal + // when this is required. + const auto& regs{maxwell3d.regs}; + + GLint* cursor{xfb_attribs.data()}; + GLint* current_stream{xfb_streams.data()}; + + for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { + const auto& layout = regs.tfb_layouts[feedback]; + UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); + if (layout.varying_count == 0) { + continue; + } + *current_stream = static_cast(feedback); + if (current_stream != xfb_streams.data()) { + // When stepping one stream, push the expected token + cursor[0] = GL_NEXT_BUFFER_NV; + cursor[1] = 0; + cursor[2] = 0; + cursor += XFB_ENTRY_STRIDE; + } + ++current_stream; + + const auto& locations = regs.tfb_varying_locs[feedback]; + std::optional current_index; + for (u32 offset = 0; offset < layout.varying_count; ++offset) { + const u8 location = locations[offset]; + const u8 index = location / 4; + + if (current_index == index) { + // Increase number of components of the previous attachment + ++cursor[-2]; + continue; + } + current_index = index; + + std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); + cursor[1] = 1; + cursor += XFB_ENTRY_STRIDE; + } + } + num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); + num_xfb_strides = static_cast(current_stream - xfb_streams.data()); +} + +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h new file mode 100644 index 000000000..62f700cf5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -0,0 +1,118 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/shader_info.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_texture_cache.h" +#include "video_core/transform_feedback.h" + +namespace OpenGL { + +class ProgramManager; + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +struct GraphicsPipelineKey { + std::array unique_hashes; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; + BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + std::array padding; + VideoCommon::TransformFeedbackState xfb_state; + + size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineKey&) const noexcept; + + bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { + return !operator==(rhs); + } + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsPipelineKey); + } else { + return offsetof(GraphicsPipelineKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); + +class GraphicsPipeline { +public: + explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, + OGLProgram program_, + std::array assembly_programs_, + const std::array& infos, + const VideoCommon::TransformFeedbackState* xfb_state); + + void Configure(bool is_indexed); + + void ConfigureTransformFeedback() const { + if (num_xfb_attribs != 0) { + ConfigureTransformFeedbackImpl(); + } + } + +private: + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + + void ConfigureTransformFeedbackImpl() const; + + TextureCache& texture_cache; + BufferCache& buffer_cache; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + ProgramManager& program_manager; + StateTracker& state_tracker; + + OGLProgram program; + std::array assembly_programs; + u32 enabled_stages_mask{}; + + std::array stage_infos{}; + std::array base_uniform_bindings{}; + std::array base_storage_bindings{}; + std::array num_texture_buffers{}; + std::array num_image_buffers{}; + + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; + GLsizei num_xfb_attribs{}; + GLsizei num_xfb_strides{}; + std::array xfb_attribs{}; + std::array xfb_streams{}; +}; + +} // namespace OpenGL + +namespace std { +template <> +struct hash { + size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept { + return k.Hash(); + } +}; +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp deleted file mode 100644 index 7c3d23f85..000000000 --- a/src/video_core/renderer_opengl/gl_graphics_program.cpp +++ /dev/null @@ -1,402 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include - -#include "common/cityhash.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_state_tracker.h" -#include "video_core/texture_cache/texture_cache.h" - -namespace OpenGL { -namespace { -using Shader::ImageBufferDescriptor; -using Shader::ImageDescriptor; -using Shader::TextureBufferDescriptor; -using Shader::TextureDescriptor; -using Tegra::Texture::TexturePair; -using VideoCommon::ImageId; - -constexpr u32 MAX_TEXTURES = 64; -constexpr u32 MAX_IMAGES = 8; - -template -u32 AccumulateCount(Range&& range) { - u32 num{}; - for (const auto& desc : range) { - num += desc.count; - } - return num; -} - -/// Translates hardware transform feedback indices -/// @param location Hardware location -/// @return Pair of ARB_transform_feedback3 token stream first and third arguments -/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt -std::pair TransformFeedbackEnum(u8 location) { - const u8 index = location / 4; - if (index >= 8 && index <= 39) { - return {GL_GENERIC_ATTRIB_NV, index - 8}; - } - if (index >= 48 && index <= 55) { - return {GL_TEXTURE_COORD_NV, index - 48}; - } - switch (index) { - case 7: - return {GL_POSITION, 0}; - case 40: - return {GL_PRIMARY_COLOR_NV, 0}; - case 41: - return {GL_SECONDARY_COLOR_NV, 0}; - case 42: - return {GL_BACK_PRIMARY_COLOR_NV, 0}; - case 43: - return {GL_BACK_SECONDARY_COLOR_NV, 0}; - } - UNIMPLEMENTED_MSG("index={}", index); - return {GL_POSITION, 0}; -} -} // Anonymous namespace - -size_t GraphicsProgramKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - -GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - u32 num_textures{}; - u32 num_images{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); - } - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - if (assembly_programs[0].handle != 0 && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } -} - -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - -void GraphicsProgram::Configure(bool is_indexed) { - std::array image_view_ids; - std::array image_view_indices; - std::array samplers; - size_t image_view_index{}; - GLsizei sampler_binding{}; - - texture_cache.SynchronizeGraphicsDescriptors(); - - buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); - buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); - - const auto& regs{maxwell3d.regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache.UnbindGraphicsStorageBuffers(stage); - if constexpr (Spec::has_storage_buffers) { - size_t ssbo_index{}; - for (const auto& desc : info.storage_buffers_descriptors) { - ASSERT(desc.count == 1); - buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, - desc.cbuf_offset, desc.is_written); - ++ssbo_index; - } - } - const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; - const auto read_handle{[&](const auto& desc, u32 index) { - ASSERT(cbufs[desc.cbuf_index].enabled); - const u32 index_offset{index << desc.size_shift}; - const u32 offset{desc.cbuf_offset + index_offset}; - const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; - if constexpr (std::is_same_v || - std::is_same_v) { - if (desc.has_secondary) { - ASSERT(cbufs[desc.secondary_cbuf_index].enabled); - const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; - const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + - second_offset}; - const u32 lhs_raw{gpu_memory.Read(addr)}; - const u32 rhs_raw{gpu_memory.Read(separate_addr)}; - const u32 raw{lhs_raw | rhs_raw}; - return TexturePair(raw, via_header_index); - } - } - return TexturePair(gpu_memory.Read(addr), via_header_index); - }}; - const auto add_image{[&](const auto& desc) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - } - }}; - if constexpr (Spec::has_texture_buffers) { - for (const auto& desc : info.texture_buffer_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - samplers[sampler_binding++] = 0; - } - } - } - if constexpr (Spec::has_image_buffers) { - for (const auto& desc : info.image_buffer_descriptors) { - add_image(desc); - } - } - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - const auto handle{read_handle(desc, index)}; - image_view_indices[image_view_index++] = handle.first; - - Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; - samplers[sampler_binding++] = sampler->Handle(); - } - } - if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - add_image(desc); - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - config_stage(0); - } - if constexpr (Spec::enabled_stages[1]) { - config_stage(1); - } - if constexpr (Spec::enabled_stages[2]) { - config_stage(2); - } - if constexpr (Spec::enabled_stages[3]) { - config_stage(3); - } - if constexpr (Spec::enabled_stages[4]) { - config_stage(4); - } - const std::span indices_span(image_view_indices.data(), image_view_index); - texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - - texture_cache.UpdateRenderTargets(false); - state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); - - ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { - size_t index{}; - const auto add_buffer{[&](const auto& desc) { - constexpr bool is_image = std::is_same_v; - for (u32 i = 0; i < desc.count; ++i) { - bool is_written{false}; - if constexpr (is_image) { - is_written = desc.is_written; - } - ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; - buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), - image_view.BufferSize(), image_view.format, - is_written, is_image); - ++index; - ++texture_buffer_index; - } - }}; - const Shader::Info& info{stage_infos[stage]}; - buffer_cache.UnbindGraphicsTextureBuffers(stage); - - if constexpr (Spec::has_texture_buffers) { - for (const auto& desc : info.texture_buffer_descriptors) { - add_buffer(desc); - } - } - if constexpr (Spec::has_image_buffers) { - for (const auto& desc : info.image_buffer_descriptors) { - add_buffer(desc); - } - } - for (const auto& desc : info.texture_descriptors) { - texture_buffer_index += desc.count; - } - if constexpr (Spec::has_images) { - for (const auto& desc : info.image_descriptors) { - texture_buffer_index += desc.count; - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - bind_stage_info(0); - } - if constexpr (Spec::enabled_stages[1]) { - bind_stage_info(1); - } - if constexpr (Spec::enabled_stages[2]) { - bind_stage_info(2); - } - if constexpr (Spec::enabled_stages[3]) { - bind_stage_info(3); - } - if constexpr (Spec::enabled_stages[4]) { - bind_stage_info(4); - } - buffer_cache.UpdateGraphicsBuffers(is_indexed); - buffer_cache.BindHostGeometryBuffers(is_indexed); - - if (assembly_programs[0].handle != 0) { - program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); - } else { - program_manager.BindProgram(program.handle); - } - const ImageId* views_it{image_view_ids.data()}; - GLsizei texture_binding = 0; - GLsizei image_binding = 0; - std::array textures; - std::array images; - const auto prepare_stage{[&](size_t stage) { - buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); - buffer_cache.BindHostStageBuffers(stage); - - texture_binding += num_texture_buffers[stage]; - image_binding += num_image_buffers[stage]; - - views_it += num_texture_buffers[stage]; - views_it += num_image_buffers[stage]; - - const auto& info{stage_infos[stage]}; - for (const auto& desc : info.texture_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - textures[texture_binding++] = image_view.Handle(desc.type); - } - } - for (const auto& desc : info.image_descriptors) { - for (u32 index = 0; index < desc.count; ++index) { - ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); - } - } - }}; - if constexpr (Spec::enabled_stages[0]) { - prepare_stage(0); - } - if constexpr (Spec::enabled_stages[1]) { - prepare_stage(1); - } - if constexpr (Spec::enabled_stages[2]) { - prepare_stage(2); - } - if constexpr (Spec::enabled_stages[3]) { - prepare_stage(3); - } - if constexpr (Spec::enabled_stages[4]) { - prepare_stage(4); - } - if (texture_binding != 0) { - ASSERT(texture_binding == sampler_binding); - glBindTextures(0, texture_binding, textures.data()); - glBindSamplers(0, sampler_binding, samplers.data()); - } - if (image_binding != 0) { - glBindImageTextures(0, image_binding, images.data()); - } -} - -void GraphicsProgram::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { - // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal - // when this is required. - const auto& regs{maxwell3d.regs}; - - GLint* cursor{xfb_attribs.data()}; - GLint* current_stream{xfb_streams.data()}; - - for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; - UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); - if (layout.varying_count == 0) { - continue; - } - *current_stream = static_cast(feedback); - if (current_stream != xfb_streams.data()) { - // When stepping one stream, push the expected token - cursor[0] = GL_NEXT_BUFFER_NV; - cursor[1] = 0; - cursor[2] = 0; - cursor += XFB_ENTRY_STRIDE; - } - ++current_stream; - - const auto& locations = regs.tfb_varying_locs[feedback]; - std::optional current_index; - for (u32 offset = 0; offset < layout.varying_count; ++offset) { - const u8 location = locations[offset]; - const u8 index = location / 4; - - if (current_index == index) { - // Increase number of components of the previous attachment - ++cursor[-2]; - continue; - } - current_index = index; - - std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); - cursor[1] = 1; - cursor += XFB_ENTRY_STRIDE; - } - } - num_xfb_attribs = static_cast((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE); - num_xfb_strides = static_cast(current_stream - xfb_streams.data()); -} - -void GraphicsProgram::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h deleted file mode 100644 index 53a57ede5..000000000 --- a/src/video_core/renderer_opengl/gl_graphics_program.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "common/bit_field.h" -#include "common/common_types.h" -#include "shader_recompiler/shader_info.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/transform_feedback.h" - -namespace OpenGL { - -class ProgramManager; - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct GraphicsProgramKey { - std::array unique_hashes; - union { - u32 raw; - BitField<0, 1, u32> xfb_enabled; - BitField<1, 1, u32> early_z; - BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; - BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; - BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; - BitField<10, 1, u32> tessellation_clockwise; - }; - std::array padding; - VideoCommon::TransformFeedbackState xfb_state; - - size_t Hash() const noexcept; - - bool operator==(const GraphicsProgramKey&) const noexcept; - - bool operator!=(const GraphicsProgramKey& rhs) const noexcept { - return !operator==(rhs); - } - - [[nodiscard]] size_t Size() const noexcept { - if (xfb_enabled != 0) { - return sizeof(GraphicsProgramKey); - } else { - return offsetof(GraphicsProgramKey, padding); - } - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - -class GraphicsProgram { -public: - explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); - - void Configure(bool is_indexed); - - void ConfigureTransformFeedback() const { - if (num_xfb_attribs != 0) { - ConfigureTransformFeedbackImpl(); - } - } - -private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); - - void ConfigureTransformFeedbackImpl() const; - - TextureCache& texture_cache; - BufferCache& buffer_cache; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - ProgramManager& program_manager; - StateTracker& state_tracker; - - OGLProgram program; - std::array assembly_programs; - u32 enabled_stages_mask{}; - - std::array stage_infos{}; - std::array base_uniform_bindings{}; - std::array base_storage_bindings{}; - std::array num_texture_buffers{}; - std::array num_image_buffers{}; - - static constexpr std::size_t XFB_ENTRY_STRIDE = 3; - GLsizei num_xfb_attribs{}; - GLsizei num_xfb_strides{}; - std::array xfb_attribs{}; - std::array xfb_streams{}; -}; - -} // namespace OpenGL - -namespace std { -template <> -struct hash { - size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { - return k.Hash(); - } -}; -} // namespace std diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 51ff42ee9..72a6dfd2a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -218,13 +218,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); - GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; + GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - program->Configure(is_indexed); + pipeline->Configure(is_indexed); const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); - BeginTransformFeedback(program, primitive_mode); + BeginTransformFeedback(pipeline, primitive_mode); const GLuint base_instance = static_cast(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = @@ -271,7 +271,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { } void RasterizerOpenGL::DispatchCompute() { - ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; + ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; if (!program) { return; } @@ -996,7 +996,7 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } -void RasterizerOpenGL::BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode) { +void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) { const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 08f509c19..afd43b2ee 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -194,7 +194,7 @@ private: void SyncVertexInstances(); /// Begin a transform feedback - void BeginTransformFeedback(GraphicsProgram* program, GLenum primitive_mode); + void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode); /// End a transform feedback void EndTransformFeedback(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index ceec83a8a..33757938a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -152,7 +152,7 @@ GLenum AssemblyStage(size_t stage_index) { return GL_NONE; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsProgramKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); @@ -282,7 +282,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; -GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { +GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; } @@ -302,18 +302,18 @@ GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& program{pair->second}; if (is_new) { - program = CreateGraphicsProgram(); + program = CreateGraphicsPipeline(); } return program.get(); } -ComputeProgram* ShaderCache::CurrentComputeProgram() { +ComputePipeline* ShaderCache::CurrentComputePipeline() { const VideoCommon::ShaderInfo* const shader{ComputeShader()}; if (!shader) { return nullptr; } const auto& qmd{kepler_compute.launch_description}; - const ComputeProgramKey key{ + const ComputePipelineKey key{ .unique_hash = shader->unique_hash, .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, @@ -323,20 +323,20 @@ ComputeProgram* ShaderCache::CurrentComputeProgram() { if (!is_new) { return pipeline.get(); } - pipeline = CreateComputeProgram(key, shader); + pipeline = CreateComputePipeline(key, shader); return pipeline.get(); } -std::unique_ptr ShaderCache::CreateGraphicsProgram() { +std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GraphicsEnvironments environments; GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); + return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); } -std::unique_ptr ShaderCache::CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, std::span envs, +std::unique_ptr ShaderCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{0}; @@ -382,27 +382,27 @@ std::unique_ptr ShaderCache::CreateGraphicsProgram( if (!device.UseAssemblyShaders()) { LinkProgram(source_program.handle); } - return std::make_unique( + return std::make_unique( texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } -std::unique_ptr ShaderCache::CreateComputeProgram( - const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { +std::unique_ptr ShaderCache::CreateComputePipeline( + const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputeProgram(main_pools, key, env, true); + return CreateComputePipeline(main_pools, key, env, true); } -std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel) { +std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -418,9 +418,9 @@ std::unique_ptr ShaderCache::CreateComputeProgram(ShaderPools& p AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, kepler_compute, - program_manager, program.info, - std::move(source_program), std::move(asm_program)); + return std::make_unique(texture_cache, buffer_cache, gpu_memory, + kepler_compute, program_manager, program.info, + std::move(source_program), std::move(asm_program)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b49cd0ac7..a56559ea9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -15,8 +15,8 @@ #include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_compute_program.h" -#include "video_core/renderer_opengl/gl_graphics_program.h" +#include "video_core/renderer_opengl/gl_compute_pipeline.h" +#include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -55,24 +55,24 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); - [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); - [[nodiscard]] ComputeProgram* CurrentComputeProgram(); + [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: - std::unique_ptr CreateGraphicsProgram(); + std::unique_ptr CreateGraphicsPipeline(); - std::unique_ptr CreateGraphicsProgram( - ShaderPools& pools, const GraphicsProgramKey& key, + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel); - std::unique_ptr CreateComputeProgram(const ComputeProgramKey& key, - const VideoCommon::ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, + const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputeProgram(ShaderPools& pools, - const ComputeProgramKey& key, - Shader::Environment& env, - bool build_in_parallel); + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineKey& key, + Shader::Environment& env, + bool build_in_parallel); Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -81,11 +81,11 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; - GraphicsProgramKey graphics_key{}; + GraphicsPipelineKey graphics_key{}; ShaderPools main_pools; - std::unordered_map> graphics_cache; - std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; + std::unordered_map> compute_cache; Shader::Profile profile; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 5ec57d707..88b734bcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -9,8 +9,8 @@ #include -#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -- cgit v1.2.3 From a41b2ed3917f9ca5af30773e4671f4829380dceb Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 25 May 2021 20:39:55 -0300 Subject: gl_shader_cache: Add disk shader cache --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 113 +++++++++++++++++++-- src/video_core/renderer_opengl/gl_shader_cache.h | 10 +- 3 files changed, 116 insertions(+), 11 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 72a6dfd2a..eec01e8c2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,7 +140,9 @@ void RasterizerOpenGL::SyncVertexInstances() { } void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) {} + const VideoCore::DiskResourceLoadCallback& callback) { + shader_cache.LoadDiskResources(title_id, stop_loading, callback); +} void RasterizerOpenGL::Clear() { MICROPROFILE_SCOPE(OpenGL_Clears); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 33757938a..3aa5ac31d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -3,17 +3,19 @@ // Refer to the license.txt file included. #include +#include #include #include -#include #include #include -#include #include "common/alignment.h" #include "common/assert.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/thread_worker.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -40,6 +42,8 @@ using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; template @@ -154,8 +158,6 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program) { - UNIMPLEMENTED_IF_MSG(key.xfb_enabled != 0, "Transform feedbacks"); - Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -282,6 +284,89 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ShaderCache::~ShaderCache() = default; +void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; + } + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "new_opengl"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; + } + shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + + struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; + }; + Common::StatefulThreadWorker workers( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); + + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + ctx->pools.ReleaseContents(); + auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector envs) { + GraphicsPipelineKey key; + file.read(reinterpret_cast(&key), sizeof(key)); + workers.QueueWork( + [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + boost::container::static_vector env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + ctx->pools.ReleaseContents(); + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); +} + GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { return nullptr; @@ -332,7 +417,18 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - return CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true); + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; + if (shader_cache_filename.empty()) { + return pipeline; + } + boost::container::static_vector env_ptrs; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (graphics_key.unique_hashes[index] != 0) { + env_ptrs.push_back(&environments.envs[index]); + } + } + VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( @@ -396,7 +492,12 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - return CreateComputePipeline(main_pools, key, env, true); + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; + if (!shader_cache_filename.empty()) { + VideoCommon::SerializePipeline(key, std::array{&env}, + shader_cache_filename); + } + return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a56559ea9..16175318b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include #include @@ -23,10 +25,6 @@ namespace Tegra { class MemoryManager; } -namespace Core::Frontend { -class EmuWindow; -} - namespace OpenGL { class Device; @@ -55,6 +53,9 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_); ~ShaderCache(); + void LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback); + [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); [[nodiscard]] ComputePipeline* CurrentComputePipeline(); @@ -88,6 +89,7 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + std::filesystem::path shader_cache_filename; }; } // namespace OpenGL -- cgit v1.2.3 From adb591a757ccb289634920d51cf519b515ca32b6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 May 2021 18:32:59 -0300 Subject: glasm: Use storage buffers instead of global memory when possible --- src/shader_recompiler/CMakeLists.txt | 1 - .../backend/glasm/emit_context.cpp | 13 +- src/shader_recompiler/backend/glasm/emit_glasm.h | 5 +- .../backend/glasm/emit_glasm_atomic.cpp | 351 ------------------- .../backend/glasm/emit_glasm_memory.cpp | 380 ++++++++++++++++++++- src/shader_recompiler/profile.h | 3 + src/video_core/renderer_opengl/gl_buffer_cache.cpp | 26 +- src/video_core/renderer_opengl/gl_buffer_cache.h | 6 + .../renderer_opengl/gl_compute_pipeline.cpp | 42 ++- .../renderer_opengl/gl_compute_pipeline.h | 12 +- src/video_core/renderer_opengl/gl_device.cpp | 18 +- src/video_core/renderer_opengl/gl_device.h | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 19 +- .../renderer_opengl/gl_graphics_pipeline.h | 12 +- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +- src/video_core/renderer_opengl/gl_rasterizer.h | 3 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 30 +- 17 files changed, 503 insertions(+), 437 deletions(-) delete mode 100644 src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index becdb7d54..d6d8e5f59 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC backend/glasm/emit_context.h backend/glasm/emit_glasm.cpp backend/glasm/emit_glasm.h - backend/glasm/emit_glasm_atomic.cpp backend/glasm/emit_glasm_barriers.cpp backend/glasm/emit_glasm_bitwise_conversion.cpp backend/glasm/emit_glasm_composite.cpp diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index b5b0e2204..e18526816 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -40,13 +41,21 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); ++cbuf_index; } + u32 ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { if (desc.count != 1) { throw NotImplementedException("Storage buffer descriptor array"); } + if (runtime_info.glasm_use_storage_buffers) { + Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); + ++bindings.storage_buffer; + ++ssbo_index; + } } - if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { - Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + if (!runtime_info.glasm_use_storage_buffers) { + if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { + Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); + } } stage = program.stage; switch (program.stage) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3d02d873e..3df32a4a6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM { [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, Bindings& bindings); -[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { +[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, + IR::Program& program) { Bindings binding; - return EmitGLASM(profile, {}, program, binding); + return EmitGLASM(profile, runtime_info, program, binding); } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp deleted file mode 100644 index e72b252a3..000000000 --- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp +++ /dev/null @@ -1,351 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "shader_recompiler/backend/glasm/emit_context.h" -#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" -#include "shader_recompiler/frontend/ir/value.h" - -namespace Shader::Backend::GLASM { -namespace { -void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, - std::string_view then_expr, std::string_view else_expr = {}) { - // Operate on bindless SSBO, call the expression with bounds checking - // address = c[binding].xy - // length = c[binding].z - const u32 sb_binding{binding.U32()}; - ctx.Add("PK64.U DC,c[{}];" // pointer = address - "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) - "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset - "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length - sb_binding, offset, offset, sb_binding); - if (else_expr.empty()) { - ctx.Add("IF NE.x;{}ENDIF;", then_expr); - } else { - ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); - } -} - -template -void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, - ValueType value, std::string_view operation, std::string_view size) { - const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, - fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); -} -} // namespace - -void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarS32 value) { - ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarS32 value) { - ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - ScalarU32 value) { - ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, - Register value) { - ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); -} - -void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "ADD", "U32"); -} - -void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarS32 value) { - Atom(ctx, inst, binding, offset, value, "MIN", "S32"); -} - -void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "MIN", "U32"); -} - -void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarS32 value) { - Atom(ctx, inst, binding, offset, value, "MAX", "S32"); -} - -void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "MAX", "U32"); -} - -void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); -} - -void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); -} - -void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "AND", "U32"); -} - -void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "OR", "U32"); -} - -void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "XOR", "U32"); -} - -void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarU32 value) { - Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); -} - -void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "ADD", "U64"); -} - -void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "S64"); -} - -void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "U64"); -} - -void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "S64"); -} - -void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "U64"); -} - -void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "AND", "U64"); -} - -void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "OR", "U64"); -} - -void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "XOR", "U64"); -} - -void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); -} - -void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, ScalarF32 value) { - Atom(ctx, inst, binding, offset, value, "ADD", "F32"); -} - -void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); -} - -void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); -} - -void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, - ScalarU32 offset, Register value) { - Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); -} - -void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicIAdd32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMin32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMin32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMax32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMax32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicInc32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicDec32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAnd32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicOr32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicXor32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicExchange32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicIAdd64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMin64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMin64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicSMax64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicUMax64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicInc64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicDec64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAnd64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicOr64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicXor64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicExchange64(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF32(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicAddF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMinF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMinF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMaxF16x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} - -void EmitGlobalAtomicMaxF32x2(EmitContext&) { - throw NotImplementedException("GLASM instruction"); -} -} // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 26b03587e..90dbb80d2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { namespace { @@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, } } -void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr, +void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, std::string_view else_expr = {}) { const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; for (size_t index = 0; index < num_buffers; ++index) { @@ -44,14 +45,22 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 "AND.U.CC RC.x,RC.x,RC.y;" - "IF NE.x;" // a && b - "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr - "PK64.U DC.y,c[{}];" // host_ssbo = cbuf - "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset - "{}" - "ELSE;", + "IF NE.x;" // a && b + "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, - address, address, index, then_expr); + address, address); + if (pointer_based) { + ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf + "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset + "{}" + "ELSE;", + index, expr); + } else { + ctx.Add("CVT.U32.U64 RC.x,DC.x;" + "{},ssbo{}[RC.x];" + "ELSE;", + expr, index); + } } if (!else_expr.empty()) { ctx.Add("{}", else_expr); @@ -64,25 +73,54 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e template void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, std::string_view size) { - StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); + } } void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), - fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); + } else { + StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.U {},{{0,0,0,0}};", ret)); + } } template void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { - GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); + } } void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { const Register ret{ctx.reg_alloc.Define(inst)}; - GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret), - fmt::format("MOV.S {},0;", ret)); + if (ctx.runtime_info.glasm_use_storage_buffers) { + GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); + } else { + GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), + fmt::format("MOV.S {},0;", ret)); + } +} + +template +void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, + ValueType value, std::string_view operation, std::string_view size) { + const Register ret{ctx.reg_alloc.Define(inst)}; + if (ctx.runtime_info.glasm_use_storage_buffers) { + ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), + offset); + } else { + StorageOp(ctx, binding, offset, + fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); + } } } // Anonymous namespace @@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o Write(ctx, binding, offset, value, "U32X4"); } +void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarS32 value) { + ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + ScalarU32 value) { + ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value) { + ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U32"); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S32"); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U32"); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarS32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S32"); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U32"); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "AND", "U32"); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "OR", "U32"); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U32"); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarU32 value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "U64"); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "S64"); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "U64"); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "S64"); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "U64"); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "AND", "U64"); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "OR", "U64"); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "XOR", "U64"); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, ScalarF32 value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F32"); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); +} + +void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); +} + +void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value) { + Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); +} + +void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicIAdd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange64(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAddF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMinF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c46452c3d..f8913bf14 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -111,7 +111,10 @@ struct RuntimeInfo { std::optional alpha_test_func; float alpha_test_reference{}; + // Static y negate value bool y_negate{}; + // Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; std::vector xfb_varyings; }; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2d0ef1307..334ed470f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + const GLuint base_binding = graphics_base_storage_bindings[stage]; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast(&ssbo)); - } else { - const GLuint base_binding = graphics_base_storage_bindings[stage]; - const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), - static_cast(offset), static_cast(size)); } } void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + if (size != 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast(offset), static_cast(size)); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast(size), @@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, reinterpret_cast(&ssbo)); - } else if (size == 0) { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), - static_cast(offset), static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4986c65fd..bc16abafb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -147,6 +147,10 @@ public: image_handles = image_handles_; } + void SetEnableStorageBuffers(bool use_storage_buffers_) { + use_storage_buffers = use_storage_buffers_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -160,6 +164,8 @@ private: bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + bool use_storage_buffers = false; + u32 max_attributes = 0; std::array graphics_base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 700ebd8b8..5cf5f97a9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -17,6 +17,15 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; +template +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + size_t ComputePipelineKey::Hash() const noexcept { return static_cast( Common::CityHash64(reinterpret_cast(this), sizeof *this)); @@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep return std::memcmp(this, &rhs, sizeof *this) == 0; } -ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } + + num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); + num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + + const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); + + const bool is_glasm{assembly_program.handle != 0}; + const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + use_storage_buffers = + !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory = !use_storage_buffers && + std::ranges::any_of(info.storage_buffers_descriptors, + [](const auto& desc) { return desc.is_written; }); } void ComputePipeline::Configure() { @@ -150,6 +159,7 @@ void ComputePipeline::Configure() { buffer_cache.UpdateComputeBuffers(); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index e3b94e2f3..dd6b62ef2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -28,6 +28,7 @@ struct Info; namespace OpenGL { +class Device; class ProgramManager; struct ComputePipelineKey { @@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v); class ComputePipeline { public: - explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: TextureCache& texture_cache; BufferCache& buffer_cache; @@ -70,6 +75,9 @@ private: u32 num_texture_buffers{}; u32 num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 18bbc4c1f..01da2bb57 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -135,13 +135,13 @@ Device::Device() { "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - max_uniform_buffers = BuildMaxUniformBuffers(); uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); + max_glasm_storage_buffer_blocks = GetInteger(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -236,22 +236,6 @@ std::string Device::GetVendorName() const { return vendor_name; } -Device::Device(std::nullptr_t) { - max_uniform_buffers.fill(std::numeric_limits::max()); - uniform_buffer_alignment = 4; - shader_storage_alignment = 4; - max_vertex_attributes = 16; - max_varyings = 15; - max_compute_shared_memory_size = 0x10000; - has_warp_intrinsics = true; - has_shader_ballot = true; - has_vertex_viewport_layer = true; - has_image_load_formatted = true; - has_texture_shadow_lod = true; - has_variable_aoffi = true; - has_depth_buffer_float = true; -} - bool Device::TestVariableAoffi() { return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 152a3acd3..d67f5693c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -13,7 +13,6 @@ namespace OpenGL { class Device { public: explicit Device(); - explicit Device(std::nullptr_t); [[nodiscard]] std::string GetVendorName() const; @@ -41,6 +40,10 @@ public: return max_compute_shared_memory_size; } + u32 GetMaxGLASMStorageBufferBlocks() const { + return max_glasm_storage_buffer_blocks; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -124,6 +127,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; + u32 max_glasm_storage_buffer_blocks{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 32df35202..19d85c482 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; template -u32 AccumulateCount(Range&& range) { +u32 AccumulateCount(const Range& range) { u32 num{}; for (const auto& desc : range) { num += desc.count; @@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu } u32 num_textures{}; u32 num_images{}; + u32 num_storage_buffers{}; for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; if (stage < 4) { @@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu num_textures += AccumulateCount(info.texture_descriptors); num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); - if (assembly_programs[0].handle != 0 && xfb_state) { + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } } @@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 62f700cf5..c1113e180 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,6 +20,7 @@ namespace OpenGL { +class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -77,6 +78,10 @@ public: } } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -99,6 +104,9 @@ private: std::array num_texture_buffers{}; std::array num_image_buffers{}; + bool use_storage_buffers{}; + bool writes_global_memory{}; + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eec01e8c2..5d4e80364 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { - ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; - if (!program) { + ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; + if (!pipeline) { return; } - program->Configure(); + pipeline->Configure(); const auto& qmd{kepler_compute.launch_description}; glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() { // Make sure memory stored from the previous GL command stream is visible // This is only needed on assembly shaders where we write to GPU memory with raw pointers - // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used - // and prefer using NV_shader_storage_buffer_object when possible - if (Settings::values.use_assembly_shaders.GetValue()) { + if (has_written_global_memory) { + has_written_global_memory = false; glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); } glFlush(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index afd43b2ee..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -225,7 +225,8 @@ private: std::array image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. - std::size_t num_queued_commands = 0; + size_t num_queued_commands = 0; + bool has_written_global_memory = false; u32 last_clip_distance_mask = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3aa5ac31d..287f497b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) { } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + bool glasm_use_storage_buffers) { Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, info.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + info.glasm_use_storage_buffers = glasm_use_storage_buffers; return info; } @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); - size_t env_index{0}; + size_t env_index{}; + u32 total_storage_buffers{}; std::array programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -447,7 +450,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } } + const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; + const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; + std::array infos{}; OGLProgram source_program; @@ -466,7 +476,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; if (device.UseAssemblyShaders()) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); @@ -479,7 +489,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( LinkProgram(source_program.handle); } return std::make_unique( - texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } @@ -508,10 +518,18 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + + u32 num_storage_buffers{}; + for (const auto& desc : program.info.storage_buffers_descriptors) { + num_storage_buffers += desc.count; + } + Shader::RuntimeInfo info; + info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + OGLAssemblyProgram asm_program; OGLProgram source_program; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { const std::vector code{EmitSPIRV(profile, program)}; @@ -519,7 +537,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique(texture_cache, buffer_cache, gpu_memory, + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); } -- cgit v1.2.3 From 3b595fe8b28001eed4a936e2a7b465bd67dcc4b7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 16:47:49 -0300 Subject: glasm: Prepare XFB from state instead of global registers --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 19d85c482..38ec88b13 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -362,13 +362,11 @@ void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. - const auto& regs{maxwell3d.regs}; - GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = regs.tfb_layouts[feedback]; + const auto& layout = xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -383,7 +381,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = regs.tfb_varying_locs[feedback]; + const auto& locations = xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; -- cgit v1.2.3 From b7764c3a796e53ac74009bc7d7cd153c64b6d743 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 27 May 2021 17:51:00 -0300 Subject: shader: Handle host exceptions --- src/shader_recompiler/exception.h | 40 ++++++++++++++++---- src/shader_recompiler/frontend/maxwell/opcodes.cpp | 2 +- src/shader_recompiler/frontend/maxwell/program.cpp | 1 + .../frontend/maxwell/translate/translate.cpp | 13 +++++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 43 +++++++++++++--------- src/video_core/renderer_opengl/gl_shader_cache.h | 5 +-- .../renderer_vulkan/vk_pipeline_cache.cpp | 35 ++++++++++++------ 8 files changed, 98 insertions(+), 45 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h index 6fe620801..013d7b1bf 100644 --- a/src/shader_recompiler/exception.h +++ b/src/shader_recompiler/exception.h @@ -5,38 +5,62 @@ #pragma once #include +#include +#include #include #include namespace Shader { -class LogicError : public std::logic_error { +class Exception : public std::exception { +public: + explicit Exception(std::string message_) noexcept : message{std::move(message_)} {} + + const char* what() const override { + return message.c_str(); + } + + void Prepend(std::string_view prepend) { + message.insert(0, prepend); + } + + void Append(std::string_view append) { + message += append; + } + +private: + std::string message; +}; + +class LogicError : public Exception { public: template LogicError(const char* message, Args&&... args) - : std::logic_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; -class RuntimeError : public std::runtime_error { +class RuntimeError : public Exception { public: template RuntimeError(const char* message, Args&&... args) - : std::runtime_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; -class NotImplementedException : public std::logic_error { +class NotImplementedException : public Exception { public: template NotImplementedException(const char* message, Args&&... args) - : std::logic_error{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} { + Append(" is not implemented"); + } }; -class InvalidArgument : public std::invalid_argument { +class InvalidArgument : public Exception { public: template InvalidArgument(const char* message, Args&&... args) - : std::invalid_argument{fmt::format(message, std::forward(args)...)} {} + : Exception{fmt::format(message, std::forward(args)...)} {} }; } // namespace Shader diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp index 12ddf2ac9..ccc40c20c 100644 --- a/src/shader_recompiler/frontend/maxwell/opcodes.cpp +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp @@ -10,7 +10,7 @@ namespace Shader::Maxwell { namespace { constexpr std::array NAME_TABLE{ -#define INST(name, cute, encode) #cute, +#define INST(name, cute, encode) cute, #include "maxwell.inc" #undef INST }; diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index ccdab1dad..900fc7ab1 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -7,6 +7,7 @@ #include #include +#include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/post_order.h" #include "shader_recompiler/frontend/maxwell/program.h" diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp index 0f4e7a251..8e3c4c5d5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp @@ -30,16 +30,21 @@ void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 locat TranslatorVisitor visitor{env, *block}; for (Location pc = location_begin; pc != location_end; ++pc) { const u64 insn{env.ReadInstruction(pc.Offset())}; - const Opcode opcode{Decode(insn)}; - switch (opcode) { + try { + const Opcode opcode{Decode(insn)}; + switch (opcode) { #define INST(name, cute, mask) \ case Opcode::name: \ Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ break; #include "shader_recompiler/frontend/maxwell/maxwell.inc" #undef OPCODE - default: - throw LogicError("Invalid opcode {}", opcode); + default: + throw LogicError("Invalid opcode {}", opcode); + } + } catch (Exception& exception) { + exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); + throw; } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 5d4e80364..54696d97d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -221,7 +221,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncState(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; - + if (!pipeline) { + return; + } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; pipeline->Configure(is_indexed); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 287f497b5..7d2ec4efa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -45,6 +45,7 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::SerializePipeline; template auto MakeSpan(Container& container) { @@ -327,10 +328,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, workers.QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env, false)}; - + auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -348,10 +350,11 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; - + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + graphics_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -419,8 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (shader_cache_filename.empty()) { + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + if (!pipeline || shader_cache_filename.empty()) { return pipeline; } boost::container::static_vector env_ptrs; @@ -429,13 +432,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - VideoCommon::SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, std::span envs, - bool build_in_parallel) { + ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -492,6 +495,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } std::unique_ptr ShaderCache::CreateComputePipeline( @@ -502,18 +509,17 @@ std::unique_ptr ShaderCache::CreateComputePipeline( env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!shader_cache_filename.empty()) { - VideoCommon::SerializePipeline(key, std::array{&env}, - shader_cache_filename); + auto pipeline{CreateComputePipeline(main_pools, key, env)}; + if (!pipeline || shader_cache_filename.empty()) { + return pipeline; } + SerializePipeline(key, std::array{&env}, shader_cache_filename); return pipeline; } std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel) { + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -540,6 +546,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); +} catch (Shader::Exception& exception) { + LOG_ERROR(Render_OpenGL, "{}", exception.what()); + return nullptr; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16175318b..cf74d34e4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,15 +65,14 @@ private: std::unique_ptr CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs, bool build_in_parallel); + std::span envs); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); std::unique_ptr CreateComputePipeline(ShaderPools& pools, const ComputePipelineKey& key, - Shader::Environment& env, - bool build_in_parallel); + Shader::Environment& env); Core::Frontend::EmuWindow& emu_window; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f86bf9c30..b6998e37c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -303,6 +303,9 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } if (current_pipeline) { current_pipeline->AddTransition(pipeline.get()); } @@ -362,9 +365,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { ShaderPools pools; auto pipeline{CreateComputePipeline(pools, key, env, false)}; - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); + if (pipeline) { + compute_cache.emplace(key, std::move(pipeline)); + } ++state.built; if (state.has_loaded) { callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); @@ -405,7 +409,7 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading std::unique_ptr PipelineCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs, bool build_in_parallel) { + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -458,6 +462,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( return std::make_unique( maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, std::move(modules), infos); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } std::unique_ptr PipelineCache::CreateGraphicsPipeline() { @@ -466,7 +474,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; - if (pipeline_cache_filename.empty()) { + if (!pipeline || pipeline_cache_filename.empty()) { return pipeline; } serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { @@ -477,7 +485,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -491,18 +499,19 @@ std::unique_ptr PipelineCache::CreateComputePipeline( main_pools.ReleaseContents(); auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; - if (!pipeline_cache_filename.empty()) { - serialization_thread.QueueWork([this, key, env = std::move(env)] { - VideoCommon::SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); - }); + if (!pipeline || pipeline_cache_filename.empty()) { + return pipeline; } + serialization_thread.QueueWork([this, key, env = std::move(env)] { + SerializePipeline(key, std::array{&env}, + pipeline_cache_filename); + }); return pipeline; } std::unique_ptr PipelineCache::CreateComputePipeline( ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, - bool build_in_parallel) { + bool build_in_parallel) try { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -517,6 +526,10 @@ std::unique_ptr PipelineCache::CreateComputePipeline( Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, thread_worker, program.info, std::move(spv_module)); + +} catch (const Shader::Exception& exception) { + LOG_ERROR(Render_Vulkan, "{}", exception.what()); + return nullptr; } } // namespace Vulkan -- cgit v1.2.3 From a7e9756671be5bb99566277709e5becdea774f34 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 02:57:42 -0300 Subject: buffer_cache: Mark uniform buffers as dirty if any enable bit changes --- src/video_core/buffer_cache/buffer_cache.h | 10 +++++----- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 +++- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 1 + src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 6 +++++- src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 3 +++ 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c92e4c30..d6b9eb99f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -142,7 +142,7 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetEnabledUniformBuffers(const std::array& mask); void SetEnabledComputeUniformBuffers(u32 enabled); @@ -670,13 +670,13 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffers != mask) { + dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffers = mask; } template diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 38ec88b13..976897067 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -100,6 +100,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } + enabled_uniform_buffers[stage] = info.constant_buffer_mask; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; num_textures += num_tex_buffer_bindings; @@ -145,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); @@ -153,7 +156,6 @@ void GraphicsPipeline::Configure(bool is_indexed) { const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index c1113e180..bf33ce604 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,6 +99,7 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; + std::array enabled_uniform_buffers{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e5f54a84f..dfe6e6a80 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,6 +218,9 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { + return info ? info->constant_buffer_mask : 0; + }); auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -259,11 +262,12 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); + buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; - buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { size_t ssbo_index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index e362d13c5..4068a0edc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -128,7 +128,10 @@ private: std::vector transitions; std::array spv_modules; + std::array stage_infos; + std::array enabled_uniform_buffers{}; + vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; vk::PipelineLayout pipeline_layout; -- cgit v1.2.3 From 916ca7432474e891864524dcbc6c879d5cdbfb72 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 30 May 2021 03:40:19 -0300 Subject: opengl: Declare fragment outputs even if they are not used Fixes Ori and the Blind Forest's menu on GLASM. For some reason (probably high level optimizations) it is not sanitized on SPIR-V for OpenGL. Vulkan is unaffected by this change. --- src/shader_recompiler/backend/glasm/emit_context.cpp | 10 +++------- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 3 +-- src/shader_recompiler/backend/spirv/emit_context.cpp | 2 +- src/shader_recompiler/profile.h | 4 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 +++++++ 6 files changed, 18 insertions(+), 10 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index e18526816..08918a5c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -117,13 +117,9 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile index, index); } } - for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { - if (!info.stores_frag_color[index]) { - continue; - } - if (index == 0) { - Add("OUTPUT frag_color0=result.color;"); - } else { + if (stage == Stage::Fragment) { + Add("OUTPUT frag_color0=result.color;"); + for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index e23208d2c..70ca6f621 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -298,8 +298,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } - const auto non_zero_frag_colors{info.stores_frag_color | std::views::drop(1)}; - if (std::ranges::find(non_zero_frag_colors, true) != non_zero_frag_colors.end()) { + if (stage == Stage::Fragment) { header += "OPTION ARB_draw_buffers;"; } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3e8899f53..7c618125e 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1320,7 +1320,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { break; case Stage::Fragment: for (u32 index = 0; index < 8; ++index) { - if (!info.stores_frag_color[index]) { + if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { continue; } frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f8913bf14..f059e3b26 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -84,7 +84,11 @@ struct Profile { bool support_int64_atomics{}; bool warp_size_potentially_larger_than_guest{}; + bool lower_left_origin_mode{}; + /// Fragment outputs have to be declared even if they are not written to avoid undefined values. + /// See Ori and the Blind Forest's main menu for reference. + bool need_declared_frag_colors{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7d2ec4efa..6ea7c0ee8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -276,7 +276,9 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_int64_atomics = false, .warp_size_potentially_larger_than_guest = true, + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b6998e37c..cec51cc77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -274,9 +274,16 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), + + .lower_left_origin_mode = false, + .need_declared_frag_colors = false, + .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, + .has_broken_signed_operations = false, + .ignore_nan_fp_comparisons = false, }; } -- cgit v1.2.3 From 4a2361a1e2271727f3259e8e4a60869165537253 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 02:15:07 -0300 Subject: buffer_cache: Reduce uniform buffer size from shader usage Increases performance significantly on certain titles. --- .../ir_opt/collect_shader_info_pass.cpp | 19 ++++++++-- src/shader_recompiler/shader_info.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 42 +++++++++++++--------- .../renderer_opengl/gl_compute_pipeline.cpp | 4 ++- .../renderer_opengl/gl_compute_pipeline.h | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 21 +++++------ .../renderer_opengl/gl_graphics_pipeline.h | 3 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 +++- .../renderer_vulkan/vk_compute_pipeline.h | 2 ++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 +++++--- .../renderer_vulkan/vk_graphics_pipeline.h | 3 +- 11 files changed, 78 insertions(+), 38 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6a5243c9f..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -560,32 +560,45 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GetCbufU32: case IR::Opcode::GetCbufF32: case IR::Opcode::GetCbufU32x2: { - if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { - AddConstantBufferDescriptor(info, index.U32(), 1); - } else { + const IR::Value index{inst.Arg(0)}; + const IR::Value offset{inst.Arg(1)}; + if (!index.IsImmediate()) { throw NotImplementedException("Constant buffer with non-immediate index"); } + AddConstantBufferDescriptor(info, index.U32(), 1); + u32 element_size{}; switch (inst.GetOpcode()) { case IR::Opcode::GetCbufU8: case IR::Opcode::GetCbufS8: info.used_constant_buffer_types |= IR::Type::U8; + element_size = 1; break; case IR::Opcode::GetCbufU16: case IR::Opcode::GetCbufS16: info.used_constant_buffer_types |= IR::Type::U16; + element_size = 2; break; case IR::Opcode::GetCbufU32: info.used_constant_buffer_types |= IR::Type::U32; + element_size = 4; break; case IR::Opcode::GetCbufF32: info.used_constant_buffer_types |= IR::Type::F32; + element_size = 4; break; case IR::Opcode::GetCbufU32x2: info.used_constant_buffer_types |= IR::Type::U32x2; + element_size = 8; break; default: break; } + u32& size{info.constant_buffer_used_sizes[index.U32()]}; + if (offset.IsImmediate()) { + size = std::max(size, offset.U32() + element_size); + } else { + size = 0x10'000; + } break; } case IR::Opcode::BindlessImageSampleImplicitLod: diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d5b2ca7bc..32f8a50ea 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -197,6 +197,7 @@ struct Info { IR::Type used_storage_buffer_types{}; u32 constant_buffer_mask{}; + std::array constant_buffer_used_sizes{}; u32 nvn_buffer_base{}; std::bitset<16> nvn_buffer_used{}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array; template class BufferCache { @@ -142,9 +144,10 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(const std::array& mask); + void SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); @@ -384,8 +387,11 @@ private: std::array compute_storage_buffers; std::array compute_texture_buffers; - std::array enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + std::array enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array enabled_storage_buffers{}; std::array written_storage_buffers{}; @@ -670,18 +676,22 @@ void BufferCache

::BindHostComputeBuffers() { } template -void BufferCache

::SetEnabledUniformBuffers(const std::array& mask) { +void BufferCache

::SetUniformBuffersState(const std::array& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers != mask) { + if (enabled_uniform_buffer_masks != mask) { dirty_uniform_buffers.fill(~u32{0}); } } - enabled_uniform_buffers = mask; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template -void BufferCache

::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache

::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template @@ -984,7 +994,7 @@ void BufferCache

::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -998,7 +1008,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -1113,11 +1123,11 @@ void BufferCache

::BindHostComputeUniformBuffers() { dirty_uniform_buffers.fill(~u32{0}); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1261,7 +1271,7 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { template void BufferCache

::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1334,7 +1344,7 @@ void BufferCache

::UpdateTransformFeedbackBuffer(u32 index) { template void BufferCache

::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 5cf5f97a9..61b6fe4b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -43,6 +43,8 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); num_image_buffers = AccumulateCount(info.image_buffer_descriptors); @@ -63,7 +65,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac } void ComputePipeline::Configure() { - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index dd6b62ef2..b5dfb65e9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -72,6 +72,7 @@ private: Shader::Info info; OGLProgram source_program; OGLAssemblyProgram assembly_program; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; u32 num_texture_buffers{}; u32 num_image_buffers{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 976897067..a5d65fdca 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -60,6 +60,14 @@ std::pair TransformFeedbackEnum(u8 location) { UNIMPLEMENTED_MSG("index={}", index); return {GL_POSITION, 0}; } + +struct Spec { + static constexpr std::array enabled_stages{true, true, true, true, true}; + static constexpr bool has_storage_buffers = true; + static constexpr bool has_texture_buffers = true; + static constexpr bool has_image_buffers = true; + static constexpr bool has_images = true; +}; } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -100,7 +108,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); } - enabled_uniform_buffers[stage] = info.constant_buffer_mask; + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; num_texture_buffers[stage] += num_tex_buffer_bindings; @@ -130,14 +139,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -struct Spec { - static constexpr std::array enabled_stages{true, true, true, true, true}; - static constexpr bool has_storage_buffers = true; - static constexpr bool has_texture_buffers = true; - static constexpr bool has_image_buffers = true; - static constexpr bool has_images = true; -}; - void GraphicsPipeline::Configure(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; @@ -147,7 +148,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index bf33ce604..508fad5bb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -99,7 +99,8 @@ private: u32 enabled_stages_mask{}; std::array stage_infos{}; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; std::array base_uniform_bindings{}; std::array base_storage_bindings{}; std::array num_texture_buffers{}; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 168ffa7e9..ca59042ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -27,6 +28,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript vk::ShaderModule spv_module_) : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + auto func{[this, &descriptor_pool] { DescriptorLayoutBuilder builder{device.GetLogical()}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -75,7 +79,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, BufferCache& buffer_cache, TextureCache& texture_cache) { update_descriptor_queue.Acquire(); - buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); + buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); buffer_cache.UnbindComputeStorageBuffers(); size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a560e382e..a6043866d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -44,6 +44,8 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; + VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; + vk::ShaderModule spv_module; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d381109d6..627ca0158 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -218,10 +218,14 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { - return info ? info->constant_buffer_mask : 0; - }); - + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + const Shader::Info* const info{infos[stage]}; + if (!info) { + continue; + } + enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; + std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + } auto func{[this, &render_pass_cache, &descriptor_pool] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; descriptor_set_layout = builder.CreateDescriptorSetLayout(); @@ -262,7 +266,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { texture_cache.SynchronizeGraphicsDescriptors(); - buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); + buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4068a0edc..8c81c28a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -130,7 +130,8 @@ private: std::array spv_modules; std::array stage_infos; - std::array enabled_uniform_buffers{}; + std::array enabled_uniform_buffer_masks{}; + VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; -- cgit v1.2.3 From 79f2fe1a39120f498e915fa0c740b15dc0f09793 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 2 Jun 2021 03:02:33 -0300 Subject: glasm: Use ARB_derivative_control conditionally --- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 7 +++--- .../backend/glasm/emit_glasm_warp.cpp | 29 +++++++++++++++++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 6 files changed, 37 insertions(+), 7 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 70ca6f621..fc01797b6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -265,9 +265,7 @@ void SetupOptions(const IR::Program& program, const Profile& profile, // TODO: Track the shared atomic ops header += "OPTION NV_internal;" "OPTION NV_shader_storage_buffer;" - "OPTION NV_gpu_program_fp64;" - "OPTION NV_bindless_texture;" - "OPTION ARB_derivative_control;"; + "OPTION NV_gpu_program_fp64;"; if (info.uses_int64_bit_atomics) { header += "OPTION NV_shader_atomic_int64;"; } @@ -295,6 +293,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { header += "OPTION EXT_shader_image_load_formatted;"; } + if (profile.support_derivative_control) { + header += "OPTION ARB_derivative_control;"; + } if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { header += "OPTION NV_early_fragment_tests;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index 6e30790bb..8cec5ee7e 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLASM { @@ -111,19 +112,39 @@ void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDX.FINE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.FINE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDX {}.x,{};", inst, p); + } } void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDY.FINE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.FINE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDY {}.x,{};", inst, p); + } } void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDX.COARSE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDX.COARSE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDX {}.x,{};", inst, p); + } } void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { - ctx.Add("DDY.COARSE {}.x,{};", inst, p); + if (ctx.profile.support_derivative_control) { + ctx.Add("DDY.COARSE {}.x,{};", inst, p); + } else { + // LOG_WARNING + ctx.Add("DDY {}.x,{};", inst, p); + } } } // namespace Shader::Backend::GLASM diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f059e3b26..3109fb69c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -82,6 +82,7 @@ struct Profile { bool support_typeless_image_loads{}; bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; + bool support_derivative_control{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 01da2bb57..3f7929f9e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -154,6 +154,7 @@ Device::Device() { has_precise_bug = TestPreciseBug(); has_broken_texture_view_formats = is_amd || (!is_linux && is_intel); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_derivative_control = GLAD_GL_ARB_derivative_control; has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d67f5693c..1ffd24883 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -96,6 +96,10 @@ public: return has_nv_viewport_array2; } + bool HasDerivativeControl() const { + return has_derivative_control; + } + bool HasDebuggingToolAttached() const { return has_debugging_tool_attached; } @@ -141,6 +145,7 @@ private: bool has_broken_texture_view_formats{}; bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; + bool has_derivative_control{}; bool has_debugging_tool_attached{}; bool use_assembly_shaders{}; bool use_asynchronous_shaders{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6ea7c0ee8..bdffac4b2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -274,6 +274,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_typeless_image_loads = device.HasImageLoadFormatted(), .support_demote_to_helper_invocation = false, .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = true, -- cgit v1.2.3 From 562af301819227d65a251a2c29c997bf798da7ba Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 4 Jun 2021 00:11:16 +0200 Subject: shader: Fix VertexA Shaders. --- src/shader_recompiler/frontend/maxwell/program.cpp | 19 +++++++++++----- src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | 24 +++++++++++++------- src/shader_recompiler/ir_opt/passes.h | 1 - src/video_core/renderer_opengl/gl_shader_cache.cpp | 26 +++++++++++++++++----- 4 files changed, 51 insertions(+), 19 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 900fc7ab1..8489f9a5f 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp @@ -171,20 +171,29 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b IR::Program result{}; Optimization::VertexATransformPass(vertex_a); Optimization::VertexBTransformPass(vertex_b); - std::swap(result.blocks, vertex_a.blocks); - result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end()); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DualVertexJoinPass(result); - result.post_order_blocks = PostOrder(result.syntax_list.front()); Optimization::DeadCodeEliminationPass(result); Optimization::VerificationPass(result); Optimization::CollectShaderInfoPass(env_vertex_b, result); diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp index b0a9f5258..a926123f2 100644 --- a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp @@ -13,16 +13,24 @@ namespace Shader::Optimization { -void VertexATransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexATransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Epilogue) { + return inst.Invalidate(); + } + } + } } -void VertexBTransformPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); -} - -void DualVertexJoinPass(IR::Program&) { - throw NotImplementedException("VertexA pass"); +void VertexBTransformPass(IR::Program& program) { + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (inst.GetOpcode() == IR::Opcode::Prologue) { + return inst.Invalidate(); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index e9cb8546a..5ebde49ea 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h @@ -25,7 +25,6 @@ void VerificationPass(const IR::Program& program); // Dual Vertex void VertexATransformPass(IR::Program& program); void VertexBTransformPass(IR::Program& program); -void DualVertexJoinPass(IR::Program& program); void JoinTextureInfo(Info& base, Info& source); void JoinStorageInfo(Info& base, Info& source); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index bdffac4b2..0e4904733 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -40,6 +40,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; using Shader::Backend::SPIRV::EmitSPIRV; +using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; @@ -446,6 +447,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( size_t env_index{}; u32 total_storage_buffers{}; std::array programs; + const bool uses_vertex_a{key.unique_hashes[0] != 0}; + const bool uses_vertex_b{key.unique_hashes[1] != 0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; @@ -454,11 +457,22 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( ++env_index; const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; - Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + if (!uses_vertex_a || index != 1) { + // Normal path + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); - for (const auto& desc : programs[index].info.storage_buffers_descriptors) { - total_storage_buffers += desc.count; + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + } else { + // VertexB path when VertexA is present. + Shader::IR::Program& program_va{programs[0]}; + Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + for (const auto& desc : program_vb.info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } + programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; @@ -472,7 +486,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + + for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; + ++index) { if (key.unique_hashes[index] == 0) { continue; } -- cgit v1.2.3 From 12fe7210d2b546bd9c5825b6517b80efc818a7fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 00:35:57 -0300 Subject: gl_shader_cache: Store workers in shader cache object --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 129 ++++++++++++--------- src/video_core/renderer_opengl/gl_shader_cache.h | 7 ++ 2 files changed, 78 insertions(+), 58 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0e4904733..9d6cef6e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -239,6 +239,15 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace +struct ShaderCache::Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, @@ -247,46 +256,49 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo ProgramManager& program_manager_, StateTracker& state_tracker_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ - state_tracker_} { - profile = Shader::Profile{ - .supported_spirv = 0x00010000, - - .unified_descriptor_binding = false, - .support_descriptor_aliasing = false, - .support_int8 = false, - .support_int16 = false, - .support_vertex_instance_id = true, - .support_float_controls = false, - .support_separate_denorm_behavior = false, - .support_separate_rounding_mode = false, - .support_fp16_denorm_preserve = false, - .support_fp32_denorm_preserve = false, - .support_fp16_denorm_flush = false, - .support_fp32_denorm_flush = false, - .support_fp16_signed_zero_nan_preserve = false, - .support_fp32_signed_zero_nan_preserve = false, - .support_fp64_signed_zero_nan_preserve = false, - .support_explicit_workgroup_layout = false, - .support_vote = true, - .support_viewport_index_layer_non_geometry = - device.HasNvViewportArray2() || device.HasVertexViewportLayer(), - .support_viewport_mask = device.HasNvViewportArray2(), - .support_typeless_image_loads = device.HasImageLoadFormatted(), - .support_demote_to_helper_invocation = false, - .support_int64_atomics = false, - .support_derivative_control = device.HasDerivativeControl(), - - .warp_size_potentially_larger_than_guest = true, - - .lower_left_origin_mode = true, - .need_declared_frag_colors = true, - - .has_broken_spirv_clamp = true, - .has_broken_unsigned_image_offsets = true, - .has_broken_signed_operations = true, - .ignore_nan_fp_comparisons = true, - }; + buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, + use_asynchronous_shaders{device.UseAsynchronousShaders()}, + profile{ + .supported_spirv = 0x00010000, + + .unified_descriptor_binding = false, + .support_descriptor_aliasing = false, + .support_int8 = false, + .support_int16 = false, + .support_vertex_instance_id = true, + .support_float_controls = false, + .support_separate_denorm_behavior = false, + .support_separate_rounding_mode = false, + .support_fp16_denorm_preserve = false, + .support_fp32_denorm_preserve = false, + .support_fp16_denorm_flush = false, + .support_fp32_denorm_flush = false, + .support_fp16_signed_zero_nan_preserve = false, + .support_fp32_signed_zero_nan_preserve = false, + .support_fp64_signed_zero_nan_preserve = false, + .support_explicit_workgroup_layout = false, + .support_vote = true, + .support_viewport_index_layer_non_geometry = + device.HasNvViewportArray2() || device.HasVertexViewportLayer(), + .support_viewport_mask = device.HasNvViewportArray2(), + .support_typeless_image_loads = device.HasImageLoadFormatted(), + .support_demote_to_helper_invocation = false, + .support_int64_atomics = false, + .support_derivative_control = device.HasDerivativeControl(), + + .warp_size_potentially_larger_than_guest = true, + + .lower_left_origin_mode = true, + .need_declared_frag_colors = true, + + .has_broken_spirv_clamp = true, + .has_broken_unsigned_image_offsets = true, + .has_broken_signed_operations = true, + .ignore_nan_fp_comparisons = true, + } { + if (use_asynchronous_shaders) { + workers = CreateWorkers(); + } } ShaderCache::~ShaderCache() = default; @@ -307,29 +319,20 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, } shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); - struct Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; - }; - Common::StatefulThreadWorker workers( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); - + if (!workers) { + workers = CreateWorkers(); + } struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { ComputePipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); auto pipeline{CreateComputePipeline(ctx->pools, key, env)}; @@ -347,7 +350,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const auto load_graphics{[&](std::ifstream& file, std::vector envs) { GraphicsPipelineKey key; file.read(reinterpret_cast(&key), sizeof(key)); - workers.QueueWork( + workers->QueueWork( [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { boost::container::static_vector env_ptrs; for (auto& env : envs) { @@ -373,7 +376,10 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, state.has_loaded = true; lock.unlock(); - workers.WaitForRequests(); + workers->WaitForRequests(); + if (!use_asynchronous_shaders) { + workers.reset(); + } } GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { @@ -570,4 +576,11 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } +std::unique_ptr> ShaderCache::CreateWorkers() + const { + return std::make_unique>( + std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index cf74d34e4..e0c5a06d8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -12,6 +12,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -44,6 +45,8 @@ struct ShaderPools { }; class ShaderCache : public VideoCommon::ShaderCache { + struct Context; + public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -74,6 +77,8 @@ private: const ComputePipelineKey& key, Shader::Environment& env); + std::unique_ptr> CreateWorkers() const; + Core::Frontend::EmuWindow& emu_window; const Device& device; TextureCache& texture_cache; @@ -82,6 +87,7 @@ private: StateTracker& state_tracker; GraphicsPipelineKey graphics_key{}; + const bool use_asynchronous_shaders; ShaderPools main_pools; std::unordered_map> graphics_cache; @@ -89,6 +95,7 @@ private: Shader::Profile profile; std::filesystem::path shader_cache_filename; + std::unique_ptr> workers; }; } // namespace OpenGL -- cgit v1.2.3 From b1ed64ac18fe7b5fc89abe06442527d8c440ddc7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 6 Jun 2021 01:28:22 -0300 Subject: gl_shader_util: Move shader utility code to a separate file --- .../renderer_opengl/gl_resource_manager.cpp | 27 ----- .../renderer_opengl/gl_resource_manager.h | 14 --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 81 +------------- src/video_core/renderer_opengl/gl_shader_util.cpp | 117 ++++++++++++++------- src/video_core/renderer_opengl/gl_shader_util.h | 89 ++-------------- src/video_core/renderer_opengl/renderer_opengl.cpp | 12 +-- src/video_core/renderer_opengl/util_shaders.cpp | 11 +- 7 files changed, 106 insertions(+), 245 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 3428e5e21..8695c29e3 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -83,18 +83,6 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(std::string_view source, GLenum type) { - if (handle != 0) { - return; - } - if (source.empty()) { - return; - } - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - handle = GLShader::LoadShader(source, type); -} - void OGLShader::Release() { if (handle == 0) return; @@ -104,21 +92,6 @@ void OGLShader::Release() { handle = 0; } -void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, - const char* frag_shader, bool separable_program, - bool hint_retrievable) { - OGLShader vert, geo, frag; - if (vert_shader) - vert.Create(vert_shader, GL_VERTEX_SHADER); - if (geo_shader) - geo.Create(geo_shader, GL_GEOMETRY_SHADER); - if (frag_shader) - frag.Create(frag_shader, GL_FRAGMENT_SHADER); - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle); -} - void OGLProgram::Release() { if (handle == 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 552d79db4..b2d5bfd3b 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -8,7 +8,6 @@ #include #include #include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -128,8 +127,6 @@ public: return *this; } - void Create(std::string_view source, GLenum type); - void Release(); GLuint handle = 0; @@ -151,17 +148,6 @@ public: return *this; } - template - void Create(bool separable_program, bool hint_retrievable, T... shaders) { - if (handle != 0) - return; - handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...); - } - - /// Creates a new internal OpenGL resource and stores the handle - void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, - bool separable_program = false, bool hint_retrievable = false); - /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9d6cef6e8..da0b36368 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_cache.h" #include "video_core/shader_environment.h" @@ -53,77 +54,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -void AddShader(GLenum stage, GLuint program, std::span code) { - OGLShader shader; - shader.handle = glCreateShader(stage); - - glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), - static_cast(code.size_bytes())); - glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); - if (!Settings::values.renderer_debug) { - return; - } - GLint shader_status{}; - glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "Failed to build shader"); - } - GLint log_length{}; - glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); - if (shader_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } -} - -OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { - OGLAssemblyProgram program; - glGenProgramsARB(1, &program.handle); - glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, - static_cast(code.size()), code.data()); - if (Settings::values.renderer_debug) { - const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); - if (err && *err) { - if (std::strstr(err, "error")) { - LOG_CRITICAL(Render_OpenGL, "\n{}", err); - LOG_INFO(Render_OpenGL, "\n{}", code); - } else { - LOG_WARNING(Render_OpenGL, "\n{}", err); - } - } - } - return program; -} - GLenum Stage(size_t stage_index) { switch (stage_index) { case 0: @@ -492,9 +422,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( if (!device.UseAssemblyShaders()) { source_program.handle = glCreateProgram(); } - - for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; - ++index) { + const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; + for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { continue; } @@ -510,7 +439,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; - AddShader(Stage(stage_index), source_program.handle, code); + AttachShader(Stage(stage_index), source_program.handle, code); } } if (!device.UseAssemblyShaders()) { @@ -565,7 +494,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } else { const std::vector code{EmitSPIRV(profile, program)}; source_program.handle = glCreateProgram(); - AddShader(GL_COMPUTE_SHADER, source_program.handle, code); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4bf0d6090..99cb81819 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -5,57 +5,100 @@ #include #include #include + #include "common/assert.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/renderer_opengl/gl_shader_util.h" -namespace OpenGL::GLShader { - -namespace { +namespace OpenGL { -std::string_view StageDebugName(GLenum type) { - switch (type) { - case GL_VERTEX_SHADER: - return "vertex"; - case GL_GEOMETRY_SHADER: - return "geometry"; - case GL_FRAGMENT_SHADER: - return "fragment"; - case GL_COMPUTE_SHADER: - return "compute"; +static void LogShader(GLuint shader) { + GLint shader_status{}; + glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "Failed to build shader"); + } + GLint log_length{}; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetShaderInfoLog(shader, log_length, nullptr, log.data()); + if (shader_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); } - UNIMPLEMENTED(); - return "unknown"; } -} // Anonymous namespace +void AttachShader(GLenum stage, GLuint program, std::string_view code) { + OGLShader shader; + shader.handle = glCreateShader(stage); -GLuint LoadShader(std::string_view source, GLenum type) { - const std::string_view debug_type = StageDebugName(type); - const GLuint shader_id = glCreateShader(type); + const GLint length = static_cast(code.size()); + const GLchar* const code_ptr = code.data(); + glShaderSource(shader.handle, 1, &code_ptr, &length); + glCompileShader(shader.handle); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} - const GLchar* source_string = source.data(); - const GLint source_length = static_cast(source.size()); +void AttachShader(GLenum stage, GLuint program, std::span code) { + OGLShader shader; + shader.handle = glCreateShader(stage); - glShaderSource(shader_id, 1, &source_string, &source_length); - LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); - glCompileShader(shader_id); + glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), + static_cast(code.size_bytes())); + glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); + glAttachShader(program, shader.handle); + if (Settings::values.renderer_debug) { + LogShader(shader.handle); + } +} + +void LinkProgram(GLuint program) { + glLinkProgram(program); + if (!Settings::values.renderer_debug) { + return; + } + GLint link_status{}; + glGetProgramiv(program, GL_LINK_STATUS, &link_status); - GLint result = GL_FALSE; - GLint info_log_length; - glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint log_length{}; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } +} - if (info_log_length > 1) { - std::string shader_error(info_log_length, ' '); - glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", shader_error); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { + OGLAssemblyProgram program; + glGenProgramsARB(1, &program.handle); + glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB, + static_cast(code.size()), code.data()); + if (Settings::values.renderer_debug) { + const auto err = reinterpret_cast(glGetString(GL_PROGRAM_ERROR_STRING_NV)); + if (err && *err) { + if (std::strstr(err, "error")) { + LOG_CRITICAL(Render_OpenGL, "\n{}", err); + LOG_INFO(Render_OpenGL, "\n{}", code); + } else { + LOG_WARNING(Render_OpenGL, "\n{}", err); + } } } - return shader_id; + return program; } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 1b770532e..ff5aa024f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,92 +4,25 @@ #pragma once +#include #include +#include #include + #include + #include "common/assert.h" #include "common/logging/log.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" -namespace OpenGL::GLShader { - -/** - * Utility function to log the source code of a list of shaders. - * @param shaders The OpenGL shaders whose source we will print. - */ -template -void LogShaderSource(T... shaders) { - auto shader_list = {shaders...}; - - for (const auto& shader : shader_list) { - if (shader == 0) - continue; - - GLint source_length; - glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length); - - std::string source(source_length, ' '); - glGetShaderSource(shader, source_length, nullptr, &source[0]); - LOG_INFO(Render_OpenGL, "Shader source {}", source); - } -} - -/** - * Utility function to create and compile an OpenGL GLSL shader - * @param source String of the GLSL shader program - * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) - */ -GLuint LoadShader(std::string_view source, GLenum type); - -/** - * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param separable_program whether to create a separable program - * @param shaders ID of shaders to attach to the program - * @returns Handle of the newly created OpenGL program object - */ -template -GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) { - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - - ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - if (hint_retrievable) { - glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - GLint result = GL_FALSE; - GLint info_log_length; - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::string program_error(info_log_length, ' '); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "{}", program_error); - } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); - } - } +namespace OpenGL { - if (result == GL_FALSE) { - // There was a problem linking the shader, print the source for debugging purposes. - LogShaderSource(shaders...); - } +void AttachShader(GLenum stage, GLuint program, std::string_view code); - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); +void AttachShader(GLenum stage, GLuint program, std::span code); - ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); +void LinkProgram(GLuint program); - return program_id; -} +OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a4805f3da..b8777643b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,6 +24,7 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" @@ -230,13 +231,10 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - OGLShader vertex_shader; - vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - - OGLShader fragment_shader; - fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - - present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); + present_program.handle = glCreateProgram(); + AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); + AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); + LinkProgram(present_program.handle); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 51e72b705..8aa0683c8 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -17,6 +17,7 @@ #include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/util_shaders.h" #include "video_core/texture_cache/accelerated_swizzle.h" @@ -40,13 +41,12 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams; using VideoCore::Surface::BytesPerBlock; namespace { - OGLProgram MakeProgram(std::string_view source) { - OGLShader shader; - shader.Create(source, GL_COMPUTE_SHADER); - OGLProgram program; - program.Create(true, false, shader.handle); + OGLShader shader; + program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, program.handle, source); + LinkProgram(program.handle); return program; } @@ -54,7 +54,6 @@ size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { return static_cast(copy.extent.width * copy.extent.height * copy.src_subresource.num_layers); } - } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) -- cgit v1.2.3 From 7eaa74ad235b669608debaf3583af94bd675b6c6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 7 Jun 2021 20:43:00 -0300 Subject: gl_texture_cache: Create image storage views Fixes SULD.D tests. --- .../renderer_opengl/gl_compute_pipeline.cpp | 5 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 5 +- .../renderer_opengl/gl_texture_cache.cpp | 132 +++++++++++++++------ src/video_core/renderer_opengl/gl_texture_cache.h | 22 +++- 4 files changed, 126 insertions(+), 38 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 61b6fe4b7..a40106c87 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -178,7 +178,10 @@ void ComputePipeline::Configure() { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } if (texture_binding != 0) { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a5d65fdca..a2ea35d5a 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -332,7 +332,10 @@ void GraphicsPipeline::Configure(bool is_indexed) { for (const auto& desc : info.image_descriptors) { for (u32 index = 0; index < desc.count; ++index) { ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; - images[image_binding++] = image_view.Handle(desc.type); + if (desc.is_written) { + texture_cache.MarkModification(image_view.image_id); + } + images[image_binding++] = image_view.StorageView(desc.type, desc.format); } } }}; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 7053be161..c373c9cb4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -328,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) { + switch (format) { + case Shader::ImageFormat::Typeless: + break; + case Shader::ImageFormat::R8_SINT: + return GL_R8I; + case Shader::ImageFormat::R8_UINT: + return GL_R8UI; + case Shader::ImageFormat::R16_UINT: + return GL_R16UI; + case Shader::ImageFormat::R16_SINT: + return GL_R16I; + case Shader::ImageFormat::R32_UINT: + return GL_R32UI; + case Shader::ImageFormat::R32G32_UINT: + return GL_RG32UI; + case Shader::ImageFormat::R32G32B32A32_UINT: + return GL_RGBA32UI; + } + UNREACHABLE_MSG("Invalid image format={}", format); + return GL_R32UI; +} } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -837,21 +859,28 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } else { internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; } - VideoCommon::SubresourceRange flatten_range = info.range; - std::array handles; - stored_views.reserve(2); - + full_range = info.range; + flat_range = info.range; + set_object_label = device.HasDebuggingToolAttached(); + is_render_target = info.IsRenderTarget(); + original_texture = image.texture.handle; + num_samples = image.info.num_samples; + if (!is_render_target) { + swizzle[0] = info.x_source; + swizzle[1] = info.y_source; + swizzle[2] = info.z_source; + swizzle[3] = info.w_source; + } switch (info.type) { case ImageViewType::e1DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e1D: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); + SetupView(Shader::TextureType::Color1D); + SetupView(Shader::TextureType::ColorArray1D); break; case ImageViewType::e2DArray: - flatten_range.extent.layers = 1; + flat_range.extent.layers = 1; [[fallthrough]]; case ImageViewType::e2D: if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) { @@ -861,26 +890,23 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI .base = {.level = info.range.base.level, .layer = 0}, .extent = {.levels = 1, .layers = 1}, }; - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); + full_range = slice_range; + + SetupView(Shader::TextureType::Color3D); } else { - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, - info.range); + SetupView(Shader::TextureType::Color2D); + SetupView(Shader::TextureType::ColorArray2D); } break; case ImageViewType::e3D: - glGenTextures(1, handles.data()); - SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); + SetupView(Shader::TextureType::Color3D); break; case ImageViewType::CubeArray: - flatten_range.extent.layers = 6; + flat_range.extent.layers = 6; [[fallthrough]]; case ImageViewType::Cube: - glGenTextures(2, handles.data()); - SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); - SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); + SetupView(Shader::TextureType::ColorCube); + SetupView(Shader::TextureType::ColorArrayCube); break; case ImageViewType::Rect: UNIMPLEMENTED(); @@ -928,22 +954,62 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} -void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, - GLuint handle, const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range) { - const GLuint parent = image.texture.handle; - const GLenum target = ImageTarget(view_type, image.info.num_samples); - glTextureView(handle, target, parent, internal_format, view_range.base.level, +GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || + image_format == Shader::ImageFormat::R16_SINT}; + if (!storage_views) { + storage_views = std::make_unique(); + } + auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + GLuint& view{type_views[static_cast(texture_type)]}; + if (view == 0) { + view = MakeView(texture_type, ShaderFormat(image_format)); + } + return view; +} + +void ImageView::SetupView(Shader::TextureType view_type) { + views[static_cast(view_type)] = MakeView(view_type, internal_format); +} + +GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) { + VideoCommon::SubresourceRange view_range; + switch (view_type) { + case Shader::TextureType::Color1D: + case Shader::TextureType::Color2D: + case Shader::TextureType::ColorCube: + view_range = flat_range; + break; + case Shader::TextureType::ColorArray1D: + case Shader::TextureType::ColorArray2D: + case Shader::TextureType::Color3D: + case Shader::TextureType::ColorArrayCube: + view_range = full_range; + break; + default: + UNREACHABLE(); + } + OGLTextureView& view = stored_views.emplace_back(); + view.Create(); + + const GLenum target = ImageTarget(view_type, num_samples); + glTextureView(view.handle, target, original_texture, view_format, view_range.base.level, view_range.extent.levels, view_range.base.layer, view_range.extent.layers); - if (!info.IsRenderTarget()) { - ApplySwizzle(handle, format, info.Swizzle()); + if (!is_render_target) { + std::array casted_swizzle; + std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) { + return static_cast(component_swizzle); + }); + ApplySwizzle(view.handle, format, casted_swizzle); } - if (device.HasDebuggingToolAttached()) { + if (set_object_label) { const std::string name = VideoCommon::Name(*this); - glObjectLabel(GL_TEXTURE, handle, static_cast(name.size()), name.data()); + glObjectLabel(GL_TEXTURE, view.handle, static_cast(name.size()), name.data()); } - stored_views.emplace_back().handle = handle; - views[static_cast(view_type)] = handle; + return view.handle; } Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 2e3e02b79..921072ebe 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -185,6 +185,9 @@ public: const VideoCommon::ImageViewInfo& view_info); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); + [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type, + Shader::ImageFormat image_format); + [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { return views[static_cast(handle_type)]; } @@ -206,16 +209,29 @@ public: } private: - void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, - const VideoCommon::ImageViewInfo& info, - VideoCommon::SubresourceRange view_range); + struct StorageViews { + std::array signeds{}; + std::array unsigneds{}; + }; + + void SetupView(Shader::TextureType view_type); + + GLuint MakeView(Shader::TextureType view_type, GLenum view_format); std::array views{}; std::vector stored_views; + std::unique_ptr storage_views; GLenum internal_format = GL_NONE; GLuint default_handle = 0; GPUVAddr gpu_addr = 0; u32 buffer_size = 0; + GLuint original_texture = 0; + int num_samples = 0; + VideoCommon::SubresourceRange flat_range; + VideoCommon::SubresourceRange full_range; + std::array swizzle{}; + bool set_object_label = false; + bool is_render_target = false; }; class ImageAlloc : public VideoCommon::ImageAllocBase {}; -- cgit v1.2.3 From 60a96c49e59e600685b9a79d80b2685318b4fb64 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 10 Jun 2021 02:24:12 -0300 Subject: buffer_cache: Fix copy based uniform bindings tracking --- src/video_core/buffer_cache/buffer_cache.h | 19 +++++++++++++++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 12 +++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ec64f2293..47cb0a47d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -680,6 +680,9 @@ void BufferCache

::SetUniformBuffersState(const std::array& m const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } dirty_uniform_buffers.fill(~u32{0}); } } @@ -1020,6 +1023,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia if (!HasFastUniformBufferBound(stage, binding_index)) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -1027,8 +1031,9 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -1046,9 +1051,15 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // This exists to avoid instances where the fast buffer is bound and a GPU write happens return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bc16abafb..060d36427 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -92,16 +92,14 @@ public: VideoCore::Surface::PixelFormat format); void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { + const GLuint handle = fast_uniforms[stage][binding_index].handle; + const GLsizeiptr gl_size = static_cast(size); if (use_assembly_shaders) { - const GLuint handle = fast_uniforms[stage][binding_index].handle; - const GLsizeiptr gl_size = static_cast(size); glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); } else { const GLuint base_binding = graphics_base_uniform_bindings[stage]; const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - fast_uniforms[stage][binding_index].handle, 0, - static_cast(size)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size); } } @@ -134,6 +132,10 @@ public: return has_fast_buffer_sub_data; } + [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept { + return !use_assembly_shaders; + } + void SetBaseUniformBindings(const std::array& bindings) { graphics_base_uniform_bindings = bindings; } -- cgit v1.2.3 From cd8427367ed372e355fa76a78d41b3bc64f997ca Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 10 Jun 2021 01:55:27 -0400 Subject: gl_buffer_cache: Use unorm internal formats for snorm texture buffer views Fixes black textures in UE4 games --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 334ed470f..0703614de 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -25,6 +25,25 @@ constexpr std::array PROGRAM_LUT{ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; + +[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) { + switch (gl_format) { + case GL_RGBA8_SNORM: + return GL_RGBA8; + case GL_R8_SNORM: + return GL_R8; + case GL_RGBA16_SNORM: + return GL_RGBA16; + case GL_R16_SNORM: + return GL_R16; + case GL_RG16_SNORM: + return GL_RG16; + case GL_RG8_SNORM: + return GL_RG8; + default: + return gl_format; + } +} } // Anonymous namespace Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) @@ -76,7 +95,11 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { OGLTexture texture; texture.Create(GL_TEXTURE_BUFFER); const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; - glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); + const GLenum texture_format{GetTextureBufferFormat(gl_format)}; + if (texture_format != gl_format) { + LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM."); + } + glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size); views.push_back({ .offset = offset, .size = size, -- cgit v1.2.3 From cb78a1b494be2f6bc0927ed5b7a878236a3dc1c0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 12 Jun 2021 01:46:30 -0300 Subject: shader: Reorder shader cache directories --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 13 +++++-------- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 +++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index da0b36368..9391a4cd9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -238,16 +238,13 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "new_opengl"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories"); return; } - shader_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + shader_cache_filename = base_dir / "opengl.bin"; if (!workers) { workers = CreateWorkers(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e61d76490..6df4088a7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -337,22 +337,19 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading if (title_id == 0) { return; } - auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; - auto base_dir{shader_dir / "vulkan"}; - auto transferable_dir{base_dir / "transferable"}; - auto precompiled_dir{base_dir / "precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); return; } - pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); + pipeline_cache_filename = base_dir / "vulkan.bin"; struct { std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; + size_t total{}; + size_t built{}; + bool has_loaded{}; } state; const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { -- cgit v1.2.3 From eaff1030de07f3739794207403ea833ee91c0034 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 19 May 2021 21:58:32 -0400 Subject: glsl: Initial backend --- src/shader_recompiler/CMakeLists.txt | 26 + .../backend/glsl/emit_context.cpp | 30 + src/shader_recompiler/backend/glsl/emit_context.h | 62 + src/shader_recompiler/backend/glsl/emit_glsl.cpp | 156 ++ src/shader_recompiler/backend/glsl/emit_glsl.h | 23 + .../backend/glsl/emit_glsl_atomic.cpp | 0 .../backend/glsl/emit_glsl_barriers.cpp | 0 .../backend/glsl/emit_glsl_bitwise_conversion.cpp | 0 .../backend/glsl/emit_glsl_composite.cpp | 0 .../backend/glsl/emit_glsl_context_get_set.cpp | 48 + .../backend/glsl/emit_glsl_control_flow.cpp | 0 .../backend/glsl/emit_glsl_convert.cpp | 0 .../backend/glsl/emit_glsl_floating_point.cpp | 0 .../backend/glsl/emit_glsl_image.cpp | 0 .../backend/glsl/emit_glsl_image_atomic.cpp | 0 .../backend/glsl/emit_glsl_instructions.h | 656 ++++++ .../backend/glsl/emit_glsl_integer.cpp | 0 .../backend/glsl/emit_glsl_logical.cpp | 0 .../backend/glsl/emit_glsl_memory.cpp | 0 .../backend/glsl/emit_glsl_not_implemented.cpp | 2149 ++++++++++++++++++++ .../backend/glsl/emit_glsl_select.cpp | 0 .../backend/glsl/emit_glsl_shared_memory.cpp | 0 .../backend/glsl/emit_glsl_special.cpp | 0 .../backend/glsl/emit_glsl_undefined.cpp | 0 .../backend/glsl/emit_glsl_warp.cpp | 0 src/shader_recompiler/backend/glsl/reg_alloc.cpp | 96 + src/shader_recompiler/backend/glsl/reg_alloc.h | 46 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- 28 files changed, 3297 insertions(+), 2 deletions(-) create mode 100644 src/shader_recompiler/backend/glsl/emit_context.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_context.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_instructions.h create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_select.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_special.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp create mode 100644 src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp create mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.cpp create mode 100644 src/shader_recompiler/backend/glsl/reg_alloc.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index d6d8e5f59..9b2240931 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -24,6 +24,32 @@ add_library(shader_recompiler STATIC backend/glasm/emit_glasm_warp.cpp backend/glasm/reg_alloc.cpp backend/glasm/reg_alloc.h + backend/glsl/emit_context.cpp + backend/glsl/emit_context.h + backend/glsl/emit_glsl.cpp + backend/glsl/emit_glsl.h + backend/glsl/emit_glsl_atomic.cpp + backend/glsl/emit_glsl_barriers.cpp + backend/glsl/emit_glsl_bitwise_conversion.cpp + backend/glsl/emit_glsl_composite.cpp + backend/glsl/emit_glsl_context_get_set.cpp + backend/glsl/emit_glsl_control_flow.cpp + backend/glsl/emit_glsl_convert.cpp + backend/glsl/emit_glsl_floating_point.cpp + backend/glsl/emit_glsl_image.cpp + backend/glsl/emit_glsl_image_atomic.cpp + backend/glsl/emit_glsl_instructions.h + backend/glsl/emit_glsl_integer.cpp + backend/glsl/emit_glsl_logical.cpp + backend/glsl/emit_glsl_memory.cpp + backend/glsl/emit_glsl_not_implemented.cpp + backend/glsl/emit_glsl_select.cpp + backend/glsl/emit_glsl_shared_memory.cpp + backend/glsl/emit_glsl_special.cpp + backend/glsl/emit_glsl_undefined.cpp + backend/glsl/emit_glsl_warp.cpp + backend/glsl/reg_alloc.cpp + backend/glsl/reg_alloc.h backend/spirv/emit_context.cpp backend/spirv/emit_context.h backend/spirv/emit_spirv.cpp diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..e2a9885f0 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -0,0 +1,30 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/frontend/ir/program.h" + +namespace Shader::Backend::GLSL { + +EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindings, + const Profile& profile_) + : info{program.info}, profile{profile_} { + std::string header = "#version 450 core\n"; + header += "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;"; + code += header; + DefineConstantBuffers(); + code += "void main(){"; +} + +void EmitContext::DefineConstantBuffers() { + if (info.constant_buffer_descriptors.empty()) { + return; + } + for (const auto& desc : info.constant_buffer_descriptors) { + Add("uniform uint c{}[{}];", desc.index, desc.count); + } +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..ffc97007d --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -0,0 +1,62 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/stage.h" + +namespace Shader { +struct Info; +struct Profile; +} // namespace Shader + +namespace Shader::Backend { +struct Bindings; +} + +namespace Shader::IR { +class Inst; +struct Program; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +class EmitContext { +public: + explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_); + + template + void Add(const char* format_str, IR::Inst& inst, Args&&... args) { + code += fmt::format(format_str, reg_alloc.Define(inst), std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + template + void Add(const char* format_str, Args&&... args) { + code += fmt::format(format_str, std::forward(args)...); + // TODO: Remove this + code += '\n'; + } + + std::string AllocVar() { + return fmt::format("var_{}", var_num++); + } + + std::string code; + RegAlloc reg_alloc; + const Info& info; + const Profile& profile; + u64 var_num{}; + +private: + void DefineConstantBuffers(); +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..bb1d8b272 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -0,0 +1,156 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +#pragma optimize("", off) +namespace Shader::Backend::GLSL { +namespace { +template +struct FuncTraits {}; + +template +struct FuncTraits { + using ReturnType = ReturnType_; + + static constexpr size_t NUM_ARGS = sizeof...(Args); + + template + using ArgType = std::tuple_element_t>; +}; + +template +void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { + inst->SetDefinition(func(ctx, std::forward(args)...)); +} + +template +ArgType Arg(EmitContext& ctx, const IR::Value& arg) { + if constexpr (std::is_same_v) { + return ctx.reg_alloc.Consume(arg); + } else if constexpr (std::is_same_v) { + return *arg.Inst(); + } else if constexpr (std::is_same_v) { + return arg; + } else if constexpr (std::is_same_v) { + return arg.U32(); + } else if constexpr (std::is_same_v) { + return arg.Attribute(); + } else if constexpr (std::is_same_v) { + return arg.Patch(); + } else if constexpr (std::is_same_v) { + return arg.Reg(); + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { + using Traits = FuncTraits; + if constexpr (std::is_same_v) { + if constexpr (is_first_arg_inst) { + SetDefinition( + ctx, inst, inst, + Arg>(ctx, inst->Arg(I))...); + } else { + SetDefinition( + ctx, inst, Arg>(ctx, inst->Arg(I))...); + } + } else { + if constexpr (is_first_arg_inst) { + func(ctx, inst, Arg>(ctx, inst->Arg(I))...); + } else { + func(ctx, Arg>(ctx, inst->Arg(I))...); + } + } +} + +template +void Invoke(EmitContext& ctx, IR::Inst* inst) { + using Traits = FuncTraits; + static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); + if constexpr (Traits::NUM_ARGS == 1) { + Invoke(ctx, inst, std::make_index_sequence<0>{}); + } else { + using FirstArgType = typename Traits::template ArgType<1>; + static constexpr bool is_first_arg_inst = std::is_same_v; + using Indices = std::make_index_sequence; + Invoke(ctx, inst, Indices{}); + } +} + +void EmitInst(EmitContext& ctx, IR::Inst* inst) { + switch (inst->GetOpcode()) { +#define OPCODE(name, result_type, ...) \ + case IR::Opcode::name: \ + return Invoke<&Emit##name>(ctx, inst); +#include "shader_recompiler/frontend/ir/opcodes.inc" +#undef OPCODE + } + throw LogicError("Invalid opcode {}", inst->GetOpcode()); +} + +void EmitCode(EmitContext& ctx, const IR::Program& program) { + for (const IR::AbstractSyntaxNode& node : program.syntax_list) { + switch (node.type) { + case IR::AbstractSyntaxNode::Type::Block: + for (IR::Inst& inst : node.data.block->Instructions()) { + EmitInst(ctx, &inst); + } + break; + case IR::AbstractSyntaxNode::Type::If: + ctx.Add("if ("); + break; + case IR::AbstractSyntaxNode::Type::EndIf: + ctx.Add("){{"); + break; + case IR::AbstractSyntaxNode::Type::Loop: + ctx.Add("while ("); + break; + case IR::AbstractSyntaxNode::Type::Repeat: + if (node.data.repeat.cond.IsImmediate()) { + if (node.data.repeat.cond.U1()) { + ctx.Add("ENDREP;"); + } else { + ctx.Add("BRK;" + "ENDREP;"); + } + } + break; + case IR::AbstractSyntaxNode::Type::Break: + if (node.data.break_node.cond.IsImmediate()) { + if (node.data.break_node.cond.U1()) { + ctx.Add("break;"); + } + } + break; + case IR::AbstractSyntaxNode::Type::Return: + case IR::AbstractSyntaxNode::Type::Unreachable: + ctx.Add("return;"); + break; + default: + ctx.Add("UNAHNDLED {}", node.type); + break; + } + } +} + +} // Anonymous namespace + +std::string EmitGLSL(const Profile& profile, IR::Program& program, Bindings& bindings) { + EmitContext ctx{program, bindings, profile}; + // ctx.SetupBuffers(); + EmitCode(ctx, program); + ctx.code += "}"; + return ctx.code; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..a7c666107 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -0,0 +1,23 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "shader_recompiler/backend/bindings.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +[[nodiscard]] std::string EmitGLSL(const Profile& profile, IR::Program& program, + Bindings& binding); + +[[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { + Bindings binding; + return EmitGLSL(profile, program, binding); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..7b2ed358e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -0,0 +1,48 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { +void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + const auto var{ctx.AllocVar()}; + ctx.Add("uint {} = c{}[{}];", var, binding.U32(), offset.U32()); +} + +void EmitGetCbufF32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} + +void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL"); +} +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image_atomic.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..1d86820dc --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -0,0 +1,656 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +class EmitContext; + +inline void EmitSetLoopSafetyVariable(EmitContext&) {} +inline void EmitGetLoopSafetyVariable(EmitContext&) {} + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx, IR::Inst* inst); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx, const IR::Value& value); +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); +void EmitBranch(EmitContext& ctx, std::string_view label); +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label); +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, std::string_view continue_label); +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label); +void EmitReturn(EmitContext& ctx); +void EmitJoin(EmitContext& ctx); +void EmitUnreachable(EmitContext& ctx); +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); +void EmitGetRegister(EmitContext& ctx); +void EmitSetRegister(EmitContext& ctx); +void EmitGetPred(EmitContext& ctx); +void EmitSetPred(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetIndirectBranchVariable(EmitContext& ctx); +void EmitGetIndirectBranchVariable(EmitContext& ctx); +void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex); +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex); +void EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); +void EmitSetSampleMask(EmitContext& ctx, std::string_view value); +void EmitSetFragDepth(EmitContext& ctx, std::string_view value); +void EmitGetZFlag(EmitContext& ctx); +void EmitGetSFlag(EmitContext& ctx); +void EmitGetCFlag(EmitContext& ctx); +void EmitGetOFlag(EmitContext& ctx); +void EmitSetZFlag(EmitContext& ctx); +void EmitSetSFlag(EmitContext& ctx); +void EmitSetCFlag(EmitContext& ctx); +void EmitSetOFlag(EmitContext& ctx); +void EmitWorkgroupId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx); +void EmitInvocationId(EmitContext& ctx); +void EmitSampleId(EmitContext& ctx); +void EmitIsHelperInvocation(EmitContext& ctx); +void EmitYDirection(EmitContext& ctx); +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset); +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); +void EmitUndefU1(EmitContext& ctx); +void EmitUndefU8(EmitContext& ctx); +void EmitUndefU16(EmitContext& ctx); +void EmitUndefU32(EmitContext& ctx); +void EmitUndefU64(EmitContext& ctx); +void EmitLoadGlobalU8(EmitContext& ctx); +void EmitLoadGlobalS8(EmitContext& ctx); +void EmitLoadGlobalU16(EmitContext& ctx); +void EmitLoadGlobalS16(EmitContext& ctx); +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address); +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address); +void EmitWriteGlobalU8(EmitContext& ctx); +void EmitWriteGlobalS8(EmitContext& ctx); +void EmitWriteGlobalU16(EmitContext& ctx); +void EmitWriteGlobalS16(EmitContext& ctx); +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset); +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset); +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2); +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3); +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4); +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index); +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value); +void EmitBitCastU16F16(EmitContext& ctx); +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value); +void EmitBitCastU64F64(EmitContext& ctx); +void EmitBitCastF16U16(EmitContext& ctx); +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value); +void EmitBitCastF64U64(EmitContext& ctx); +void EmitPackUint2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value); +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value); +void EmitGetZeroFromOp(EmitContext& ctx); +void EmitGetSignFromOp(EmitContext& ctx); +void EmitGetCarryFromOp(EmitContext& ctx); +void EmitGetOverflowFromOp(EmitContext& ctx); +void EmitGetSparseFromOp(EmitContext& ctx); +void EmitGetInBoundsFromOp(EmitContext& ctx); +void EmitFPAbs16(EmitContext& ctx, std::string_view value); +void EmitFPAbs32(EmitContext& ctx, std::string_view value); +void EmitFPAbs64(EmitContext& ctx, std::string_view value); +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c); +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitFPNeg16(EmitContext& ctx, std::string_view value); +void EmitFPNeg32(EmitContext& ctx, std::string_view value); +void EmitFPNeg64(EmitContext& ctx, std::string_view value); +void EmitFPSin(EmitContext& ctx, std::string_view value); +void EmitFPCos(EmitContext& ctx, std::string_view value); +void EmitFPExp2(EmitContext& ctx, std::string_view value); +void EmitFPLog2(EmitContext& ctx, std::string_view value); +void EmitFPRecip32(EmitContext& ctx, std::string_view value); +void EmitFPRecip64(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value); +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value); +void EmitFPSqrt(EmitContext& ctx, std::string_view value); +void EmitFPSaturate16(EmitContext& ctx, std::string_view value); +void EmitFPSaturate32(EmitContext& ctx, std::string_view value); +void EmitFPSaturate64(EmitContext& ctx, std::string_view value); +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value); +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value); +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value); +void EmitFPFloor16(EmitContext& ctx, std::string_view value); +void EmitFPFloor32(EmitContext& ctx, std::string_view value); +void EmitFPFloor64(EmitContext& ctx, std::string_view value); +void EmitFPCeil16(EmitContext& ctx, std::string_view value); +void EmitFPCeil32(EmitContext& ctx, std::string_view value); +void EmitFPCeil64(EmitContext& ctx, std::string_view value); +void EmitFPTrunc16(EmitContext& ctx, std::string_view value); +void EmitFPTrunc32(EmitContext& ctx, std::string_view value); +void EmitFPTrunc64(EmitContext& ctx, std::string_view value); +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitFPIsNan16(EmitContext& ctx, std::string_view value); +void EmitFPIsNan32(EmitContext& ctx, std::string_view value); +void EmitFPIsNan64(EmitContext& ctx, std::string_view value); +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitINeg32(EmitContext& ctx, std::string_view value); +void EmitINeg64(EmitContext& ctx, std::string_view value); +void EmitIAbs32(EmitContext& ctx, std::string_view value); +void EmitIAbs64(EmitContext& ctx, std::string_view value); +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift); +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b); +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count); +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count); +void EmitBitReverse32(EmitContext& ctx, std::string_view value); +void EmitBitCount32(EmitContext& ctx, std::string_view value); +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value); +void EmitFindSMsb32(EmitContext& ctx, std::string_view value); +void EmitFindUMsb32(EmitContext& ctx, std::string_view value); +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max); +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs); +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value); +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value); +void EmitGlobalAtomicIAdd32(EmitContext& ctx); +void EmitGlobalAtomicSMin32(EmitContext& ctx); +void EmitGlobalAtomicUMin32(EmitContext& ctx); +void EmitGlobalAtomicSMax32(EmitContext& ctx); +void EmitGlobalAtomicUMax32(EmitContext& ctx); +void EmitGlobalAtomicInc32(EmitContext& ctx); +void EmitGlobalAtomicDec32(EmitContext& ctx); +void EmitGlobalAtomicAnd32(EmitContext& ctx); +void EmitGlobalAtomicOr32(EmitContext& ctx); +void EmitGlobalAtomicXor32(EmitContext& ctx); +void EmitGlobalAtomicExchange32(EmitContext& ctx); +void EmitGlobalAtomicIAdd64(EmitContext& ctx); +void EmitGlobalAtomicSMin64(EmitContext& ctx); +void EmitGlobalAtomicUMin64(EmitContext& ctx); +void EmitGlobalAtomicSMax64(EmitContext& ctx); +void EmitGlobalAtomicUMax64(EmitContext& ctx); +void EmitGlobalAtomicInc64(EmitContext& ctx); +void EmitGlobalAtomicDec64(EmitContext& ctx); +void EmitGlobalAtomicAnd64(EmitContext& ctx); +void EmitGlobalAtomicOr64(EmitContext& ctx); +void EmitGlobalAtomicXor64(EmitContext& ctx); +void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicAddF32(EmitContext& ctx); +void EmitGlobalAtomicAddF16x2(EmitContext& ctx); +void EmitGlobalAtomicAddF32x2(EmitContext& ctx); +void EmitGlobalAtomicMinF16x2(EmitContext& ctx); +void EmitGlobalAtomicMinF32x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b); +void EmitLogicalNot(EmitContext& ctx, std::string_view value); +void EmitConvertS16F16(EmitContext& ctx, std::string_view value); +void EmitConvertS16F32(EmitContext& ctx, std::string_view value); +void EmitConvertS16F64(EmitContext& ctx, std::string_view value); +void EmitConvertS32F16(EmitContext& ctx, std::string_view value); +void EmitConvertS32F32(EmitContext& ctx, std::string_view value); +void EmitConvertS32F64(EmitContext& ctx, std::string_view value); +void EmitConvertS64F16(EmitContext& ctx, std::string_view value); +void EmitConvertS64F32(EmitContext& ctx, std::string_view value); +void EmitConvertS64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU16F16(EmitContext& ctx, std::string_view value); +void EmitConvertU16F32(EmitContext& ctx, std::string_view value); +void EmitConvertU16F64(EmitContext& ctx, std::string_view value); +void EmitConvertU32F16(EmitContext& ctx, std::string_view value); +void EmitConvertU32F32(EmitContext& ctx, std::string_view value); +void EmitConvertU32F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64F16(EmitContext& ctx, std::string_view value); +void EmitConvertU64F32(EmitContext& ctx, std::string_view value); +void EmitConvertU64F64(EmitContext& ctx, std::string_view value); +void EmitConvertU64U32(EmitContext& ctx, std::string_view value); +void EmitConvertU32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF16F32(EmitContext& ctx, std::string_view value); +void EmitConvertF32F16(EmitContext& ctx, std::string_view value); +void EmitConvertF32F64(EmitContext& ctx, std::string_view value); +void EmitConvertF64F32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S8(EmitContext& ctx, std::string_view value); +void EmitConvertF16S16(EmitContext& ctx, std::string_view value); +void EmitConvertF16S32(EmitContext& ctx, std::string_view value); +void EmitConvertF16S64(EmitContext& ctx, std::string_view value); +void EmitConvertF16U8(EmitContext& ctx, std::string_view value); +void EmitConvertF16U16(EmitContext& ctx, std::string_view value); +void EmitConvertF16U32(EmitContext& ctx, std::string_view value); +void EmitConvertF16U64(EmitContext& ctx, std::string_view value); +void EmitConvertF32S8(EmitContext& ctx, std::string_view value); +void EmitConvertF32S16(EmitContext& ctx, std::string_view value); +void EmitConvertF32S32(EmitContext& ctx, std::string_view value); +void EmitConvertF32S64(EmitContext& ctx, std::string_view value); +void EmitConvertF32U8(EmitContext& ctx, std::string_view value); +void EmitConvertF32U16(EmitContext& ctx, std::string_view value); +void EmitConvertF32U32(EmitContext& ctx, std::string_view value); +void EmitConvertF32U64(EmitContext& ctx, std::string_view value); +void EmitConvertF64S8(EmitContext& ctx, std::string_view value); +void EmitConvertF64S16(EmitContext& ctx, std::string_view value); +void EmitConvertF64S32(EmitContext& ctx, std::string_view value); +void EmitConvertF64S64(EmitContext& ctx, std::string_view value); +void EmitConvertF64U8(EmitContext& ctx, std::string_view value); +void EmitConvertF64U16(EmitContext& ctx, std::string_view value); +void EmitConvertF64U32(EmitContext& ctx, std::string_view value); +void EmitConvertF64U64(EmitContext& ctx, std::string_view value); +void EmitBindlessImageSampleImplicitLod(EmitContext&); +void EmitBindlessImageSampleExplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); +void EmitBindlessImageGather(EmitContext&); +void EmitBindlessImageGatherDref(EmitContext&); +void EmitBindlessImageFetch(EmitContext&); +void EmitBindlessImageQueryDimensions(EmitContext&); +void EmitBindlessImageQueryLod(EmitContext&); +void EmitBindlessImageGradient(EmitContext&); +void EmitBindlessImageRead(EmitContext&); +void EmitBindlessImageWrite(EmitContext&); +void EmitBoundImageSampleImplicitLod(EmitContext&); +void EmitBoundImageSampleExplicitLod(EmitContext&); +void EmitBoundImageSampleDrefImplicitLod(EmitContext&); +void EmitBoundImageSampleDrefExplicitLod(EmitContext&); +void EmitBoundImageGather(EmitContext&); +void EmitBoundImageGatherDref(EmitContext&); +void EmitBoundImageFetch(EmitContext&); +void EmitBoundImageQueryDimensions(EmitContext&); +void EmitBoundImageQueryLod(EmitContext&); +void EmitBoundImageGradient(EmitContext&); +void EmitBoundImageRead(EmitContext&); +void EmitBoundImageWrite(EmitContext&); +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset); +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset); +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2); +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref); +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms); +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod); +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp); +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords); +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color); +void EmitBindlessImageAtomicIAdd32(EmitContext&); +void EmitBindlessImageAtomicSMin32(EmitContext&); +void EmitBindlessImageAtomicUMin32(EmitContext&); +void EmitBindlessImageAtomicSMax32(EmitContext&); +void EmitBindlessImageAtomicUMax32(EmitContext&); +void EmitBindlessImageAtomicInc32(EmitContext&); +void EmitBindlessImageAtomicDec32(EmitContext&); +void EmitBindlessImageAtomicAnd32(EmitContext&); +void EmitBindlessImageAtomicOr32(EmitContext&); +void EmitBindlessImageAtomicXor32(EmitContext&); +void EmitBindlessImageAtomicExchange32(EmitContext&); +void EmitBoundImageAtomicIAdd32(EmitContext&); +void EmitBoundImageAtomicSMin32(EmitContext&); +void EmitBoundImageAtomicUMin32(EmitContext&); +void EmitBoundImageAtomicSMax32(EmitContext&); +void EmitBoundImageAtomicUMax32(EmitContext&); +void EmitBoundImageAtomicInc32(EmitContext&); +void EmitBoundImageAtomicDec32(EmitContext&); +void EmitBoundImageAtomicAnd32(EmitContext&); +void EmitBoundImageAtomicOr32(EmitContext&); +void EmitBoundImageAtomicXor32(EmitContext&); +void EmitBoundImageAtomicExchange32(EmitContext&); +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value); +void EmitLaneId(EmitContext& ctx); +void EmitVoteAll(EmitContext& ctx, std::string_view pred); +void EmitVoteAny(EmitContext& ctx, std::string_view pred); +void EmitVoteEqual(EmitContext& ctx, std::string_view pred); +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred); +void EmitSubgroupEqMask(EmitContext& ctx); +void EmitSubgroupLtMask(EmitContext& ctx); +void EmitSubgroupLeMask(EmitContext& ctx); +void EmitSubgroupGtMask(EmitContext& ctx); +void EmitSubgroupGeMask(EmitContext& ctx); +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask); +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask); +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle); +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a); +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a); +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a); + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..8bd40458b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -0,0 +1,2149 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/ir/value.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4100) +#endif + +namespace Shader::Backend::GLSL { + +static void NotImplemented() { + throw NotImplementedException("GLSL instruction"); +} + +void EmitPhi(EmitContext& ctx, IR::Inst* inst) { + NotImplemented(); +} + +void EmitVoid(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIdentity(EmitContext& ctx, const IR::Value& value) { + const auto var{ctx.AllocVar()}; + switch (value.Type()) { + case IR::Type::U32: + ctx.Add("uint {}={};", var, value.U32()); + break; + default: + ctx.Add("EmitIdentity {}", value.Type()); + break; + } +} + +void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { + NotImplemented(); +} + +void EmitReference(EmitContext&) { + NotImplemented(); +} + +void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value) { + NotImplemented(); +} + +void EmitBranch(EmitContext& ctx, std::string_view label) { + NotImplemented(); +} + +void EmitBranchConditional(EmitContext& ctx, std::string_view condition, + std::string_view true_label, std::string_view false_label) { + NotImplemented(); +} + +void EmitLoopMerge(EmitContext& ctx, std::string_view merge_label, + std::string_view continue_label) { + NotImplemented(); +} + +void EmitSelectionMerge(EmitContext& ctx, std::string_view merge_label) { + NotImplemented(); +} + +void EmitReturn(EmitContext& ctx) { + NotImplemented(); +} + +void EmitJoin(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUnreachable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { + NotImplemented(); +} + +void EmitBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPrologue(EmitContext& ctx) { + // NotImplemented(); +} + +void EmitEpilogue(EmitContext& ctx) { + // NotImplemented(); +} + +void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { + NotImplemented(); +} + +void EmitGetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetRegister(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetPred(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetGotoVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetIndirectBranchVariable(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { + NotImplemented(); +} + +void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view vertex) { + NotImplemented(); +} + +void EmitGetPatch(EmitContext& ctx, IR::Patch patch) { + NotImplemented(); +} + +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + NotImplemented(); +} + +void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetZFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetSFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetCFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSetOFlag(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWorkgroupId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLocalInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitInvocationId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSampleId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitIsHelperInvocation(EmitContext& ctx) { + NotImplemented(); +} + +void EmitYDirection(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadLocal(EmitContext& ctx, std::string_view word_offset) { + NotImplemented(); +} + +void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { + NotImplemented(); +} + +void EmitUndefU1(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitUndefU64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) { + NotImplemented(); +} + +void EmitWriteGlobalU8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS8(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalU16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobalS16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { + NotImplemented(); +} + +void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { + NotImplemented(); +} + +void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + // ctx.Add("c{}[{}]={}", binding.U32() + 2, offset.U32(), 16); + ctx.Add("EmitWriteStorage32"); + // ctx.Add("c{}[{}]={}", binding.U32(), offset.U32(), value); +} + +void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitLoadSharedU8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS8(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedS16(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU32(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU64(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitLoadSharedU128(EmitContext& ctx, std::string_view offset) { + NotImplemented(); +} + +void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { + NotImplemented(); +} + +void EmitCompositeConstructU32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF32x2(EmitContext& ctx, std::string_view e1, std::string_view e2) { + NotImplemented(); +} + +void EmitCompositeConstructF32x3(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3) { + NotImplemented(); +} + +void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::string_view e2, + std::string_view e3, std::string_view e4) { + NotImplemented(); +} + +void EmitCompositeExtractF32x2(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF32x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeConstructF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeConstructF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x3(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeExtractF64x4(EmitContext& ctx) { + NotImplemented(); +} + +void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, + u32 index) { + NotImplemented(); +} + +void EmitSelectU1(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectU64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF32(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitSelectF64(EmitContext& ctx, std::string_view cond, std::string_view true_value, + std::string_view false_value) { + NotImplemented(); +} + +void EmitBitCastU16F16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastU64F64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF16U16(EmitContext& ctx) { + NotImplemented(); +} + +void EmitBitCastF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCastF64U64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitPackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackUint2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackHalf2x16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitPackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitUnpackDouble2x32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitGetZeroFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSignFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetCarryFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetOverflowFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetSparseFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGetInBoundsFromOp(EmitContext& ctx) { + NotImplemented(); +} + +void EmitFPAbs16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPFma16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPFma64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b, + std::string_view c) { + NotImplemented(); +} + +void EmitFPMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMax64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMin64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul16(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPMul64(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitFPNeg16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPNeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSin(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCos(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPExp2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPLog2(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecip64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRecipSqrt64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSqrt(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPSaturate64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPClamp16(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp32(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPClamp64(EmitContext& ctx, std::string_view value, std::string_view min_value, + std::string_view max_value) { + NotImplemented(); +} + +void EmitFPRoundEven16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPRoundEven64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPFloor64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPCeil64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPTrunc64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordNotEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThan64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordLessThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitFPIsNan16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFPIsNan64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAdd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIAdd64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitISub64(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitIMul32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitINeg32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitINeg64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitIAbs64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitShiftLeftLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftLeftLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightLogical64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic32(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitShiftRightArithmetic64(EmitContext& ctx, std::string_view base, std::string_view shift) { + NotImplemented(); +} + +void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitBitFieldInsert(EmitContext& ctx, std::string_view base, std::string_view insert, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, std::string_view base, + std::string_view offset, std::string_view count) { + NotImplemented(); +} + +void EmitBitReverse32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitCount32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBitwiseNot32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindSMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitFindUMsb32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitSMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMin32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitUMax32(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitSClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitUClamp32(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view min, + std::string_view max) { + NotImplemented(); +} + +void EmitSLessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitIEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSLessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitULessThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThan(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitINotEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitUGreaterThanEqual(EmitContext& ctx, std::string_view lhs, std::string_view rhs) { + NotImplemented(); +} + +void EmitSharedAtomicIAdd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicInc32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicDec32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicOr32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicXor32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange32(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + std::string_view value) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicIAdd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMin64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicSMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicUMax64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicInc64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicDec64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAnd64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicOr64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicXor64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicExchange64(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicAddF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMinF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF16x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitGlobalAtomicMaxF32x2(EmitContext& ctx) { + NotImplemented(); +} + +void EmitLogicalOr(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalAnd(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalXor(EmitContext& ctx, std::string_view a, std::string_view b) { + NotImplemented(); +} + +void EmitLogicalNot(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertS64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU16F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertU32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32F64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64F32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF16U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF32U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64S64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U8(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U16(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U32(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitConvertF64U64(EmitContext& ctx, std::string_view value) { + NotImplemented(); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGather(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGatherDref(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageFetch(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageQueryLod(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageGradient(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageRead(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageWrite(EmitContext&) { + NotImplemented(); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view bias_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view dref, + std::string_view lod_lc, const IR::Value& offset) { + NotImplemented(); +} + +void EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { + NotImplemented(); +} + +void EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, const IR::Value& offset, const IR::Value& offset2, + std::string_view dref) { + NotImplemented(); +} + +void EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view offset, std::string_view lod, + std::string_view ms) { + NotImplemented(); +} + +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view lod) { + NotImplemented(); +} + +void EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view derivates, std::string_view offset, + std::string_view lod_clamp) { + NotImplemented(); +} + +void EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords) { + NotImplemented(); +} + +void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view color) { + NotImplemented(); +} + +void EmitBindlessImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBindlessImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicIAdd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMin32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicSMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicUMax32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicInc32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicDec32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicAnd32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicOr32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicXor32(EmitContext&) { + NotImplemented(); +} + +void EmitBoundImageAtomicExchange32(EmitContext&) { + NotImplemented(); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, + std::string_view coords, std::string_view value) { + NotImplemented(); +} + +void EmitLaneId(EmitContext& ctx) { + NotImplemented(); +} + +void EmitVoteAll(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteAny(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitVoteEqual(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupBallot(EmitContext& ctx, std::string_view pred) { + NotImplemented(); +} + +void EmitSubgroupEqMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupLeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGtMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitSubgroupGeMask(EmitContext& ctx) { + NotImplemented(); +} + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + NotImplemented(); +} + +void EmitFSwizzleAdd(EmitContext& ctx, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + NotImplemented(); +} + +void EmitDPdxFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyFine(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdxCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +void EmitDPdyCoarse(EmitContext& ctx, std::string_view op_a) { + NotImplemented(); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..e69de29bb diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp new file mode 100644 index 000000000..591a87988 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -0,0 +1,96 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "shader_recompiler/backend/glsl/reg_alloc.h" +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/value.h" + +namespace Shader::Backend::GLSL { +namespace { +constexpr std::string_view SWIZZLE = "xyzw"; + +std::string Representation(Id id) { + if (id.is_condition_code != 0) { + throw NotImplementedException("Condition code"); + } + if (id.is_spill != 0) { + throw NotImplementedException("Spilling"); + } + const u32 num_elements{id.num_elements_minus_one + 1}; + const u32 index{static_cast(id.index)}; + if (num_elements == 4) { + return fmt::format("R{}", index); + } else { + return fmt::format("R{}.{}", index, SWIZZLE.substr(id.base_element, num_elements)); + } +} + +std::string MakeImm(const IR::Value& value) { + switch (value.Type()) { + case IR::Type::U1: + return fmt::format("{}", value.U1() ? "true" : "false"); + case IR::Type::U32: + return fmt::format("{}", value.U32()); + case IR::Type::F32: + return fmt::format("{}", value.F32()); + case IR::Type::U64: + return fmt::format("{}", value.U64()); + case IR::Type::F64: + return fmt::format("{}", value.F64()); + default: + throw NotImplementedException("Immediate type {}", value.Type()); + } +} +} // Anonymous namespace + +std::string RegAlloc::Define(IR::Inst& inst, u32 num_elements, u32 alignment) { + const Id id{Alloc(num_elements, alignment)}; + inst.SetDefinition(id); + return Representation(id); +} + +std::string RegAlloc::Consume(const IR::Value& value) { + return value.IsImmediate() ? MakeImm(value) : Consume(*value.Inst()); +} + +std::string RegAlloc::Consume(IR::Inst& inst) { + const Id id{inst.Definition()}; + inst.DestructiveRemoveUsage(); + if (!inst.HasUses()) { + Free(id); + } + return Representation(inst.Definition()); +} + +Id RegAlloc::Alloc(u32 num_elements, [[maybe_unused]] u32 alignment) { + for (size_t reg = 0; reg < NUM_REGS; ++reg) { + if (register_use[reg]) { + continue; + } + num_used_registers = std::max(num_used_registers, reg + 1); + register_use[reg] = true; + return Id{ + .base_element = 0, + .num_elements_minus_one = num_elements - 1, + .index = static_cast(reg), + .is_spill = 0, + .is_condition_code = 0, + }; + } + throw NotImplementedException("Register spilling"); +} + +void RegAlloc::Free(Id id) { + if (id.is_spill != 0) { + throw NotImplementedException("Free spill"); + } + register_use[id.index] = false; +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h new file mode 100644 index 000000000..850a93d6a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -0,0 +1,46 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace Shader::IR { +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::GLSL { + +struct Id { + u32 base_element : 2; + u32 num_elements_minus_one : 2; + u32 index : 26; + u32 is_spill : 1; + u32 is_condition_code : 1; +}; + +class RegAlloc { +public: + std::string Define(IR::Inst& inst, u32 num_elements = 1, u32 alignment = 1); + + std::string Consume(const IR::Value& value); + +private: + static constexpr size_t NUM_REGS = 4096; + static constexpr size_t NUM_ELEMENTS = 4; + + std::string Consume(IR::Inst& inst); + + Id Alloc(u32 num_elements, u32 alignment); + + void Free(Id id); + + size_t num_used_registers{}; + std::bitset register_use{}; +}; + +} // namespace Shader::Backend::GLSL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9391a4cd9..4387532ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -19,6 +19,7 @@ #include "core/core.h" #include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" +#include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -40,6 +41,7 @@ namespace OpenGL { namespace { using Shader::Backend::GLASM::EmitGLASM; +using Shader::Backend::GLSL::EmitGLSL; using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::MergeDualVertexPrograms; using Shader::Maxwell::TranslateProgram; @@ -435,7 +437,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; + const auto code{EmitGLSL(profile, program, binding)}; + OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } @@ -489,7 +492,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { - const std::vector code{EmitSPIRV(profile, program)}; + const auto code{EmitGLSL(profile, program)}; source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); -- cgit v1.2.3 From 53667ddd4ebdaa98f9c40ef3aee8efbdb15a0a6f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 10 Jul 2021 17:57:35 -0300 Subject: glsl: Fixup build issues --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4387532ab..602cf025b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -437,7 +437,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { - const auto code{EmitGLSL(profile, program, binding)}; + const auto code{EmitGLSL(profile, runtime_info, program, binding)}; OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } -- cgit v1.2.3 From bd24fa97138ff1e33a7f8d3c30a4f4482a6482a8 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 19:55:40 -0400 Subject: glsl: Query GL Device for FP16 extension support --- src/shader_recompiler/backend/glsl/emit_context.cpp | 9 +++++++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_device.cpp | 2 ++ src/video_core/renderer_opengl/gl_device.h | 10 ++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 5 files changed, 23 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 9c3fd44ba..6f769fa10 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -5,6 +5,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { @@ -40,8 +41,12 @@ void EmitContext::SetupExtensions(std::string& header) { header += "#extension NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { - // TODO: AMD - header += "#extension GL_NV_gpu_shader5 : enable\n"; + if (profile.support_gl_nv_gpu_shader_5) { + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } + if (profile.support_gl_amd_gpu_shader_half_float) { + header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; + } } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3109fb69c..5d269368a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -83,6 +83,8 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_gl_nv_gpu_shader_5{}; + bool support_gl_amd_gpu_shader_half_float{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f7929f9e..071133781 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -158,6 +158,8 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; + has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 1ffd24883..9b9402c29 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,14 @@ public: return has_depth_buffer_float; } + bool HasNvGpuShader5() const { + return has_nv_gpu_shader_5; + } + + bool HasAmdShaderHalfFloat() const { + return has_amd_shader_half_float; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -151,6 +159,8 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_nv_gpu_shader_5{}; + bool has_amd_shader_half_float{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 602cf025b..e00d01e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -217,6 +217,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), + .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .warp_size_potentially_larger_than_guest = true, -- cgit v1.2.3 From 3d086e6130a2c5f0546ccef3b234c65ef2f0c99b Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 26 May 2021 00:16:20 -0400 Subject: glsl: Implement some attribute getters and setters --- .../backend/glsl/emit_context.cpp | 52 +++++- src/shader_recompiler/backend/glsl/emit_context.h | 9 + .../backend/glsl/emit_glsl_composite.cpp | 14 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 53 ++++++ .../backend/glsl/emit_glsl_image.cpp | 205 +++++++++++++++++++++ .../backend/glsl/emit_glsl_instructions.h | 9 +- .../backend/glsl/emit_glsl_not_implemented.cpp | 178 +----------------- src/shader_recompiler/backend/glsl/reg_alloc.cpp | 6 + src/shader_recompiler/backend/glsl/reg_alloc.h | 2 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - 10 files changed, 337 insertions(+), 192 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7b6c6d22b..8e5983909 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -14,17 +14,63 @@ EmitContext::EmitContext(IR::Program& program, [[maybe_unused]] Bindings& bindin : info{program.info}, profile{profile_} { std::string header = "#version 450\n"; SetupExtensions(header); - if (program.stage == Stage::Compute) { + stage = program.stage; + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + stage_name = "vertex"; + attrib_name = "vertex"; + // TODO: add only what's used by the shader + header += + "out gl_PerVertex {vec4 gl_Position;float gl_PointSize;float gl_ClipDistance[];};"; + break; + case Stage::TessellationControl: + case Stage::TessellationEval: + stage_name = "primitive"; + attrib_name = "primitive"; + break; + case Stage::Geometry: + stage_name = "primitive"; + attrib_name = "vertex"; + break; + case Stage::Fragment: + stage_name = "fragment"; + attrib_name = "fragment"; + break; + case Stage::Compute: + stage_name = "invocation"; header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;\n", program.workgroup_size[0], program.workgroup_size[1], program.workgroup_size[2]); + break; } code += header; - + const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + for (size_t index = 0; index < info.input_generics.size(); ++index) { + const auto& generic{info.input_generics[index]}; + if (generic.used) { + Add("layout(location={})in vec4 in_attr{};", index, index); + } + } + for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { + if (!info.stores_frag_color[index]) { + continue; + } + Add("layout(location={})out vec4 frag_color{};", index, index); + } + for (size_t index = 0; index < info.stores_generics.size(); ++index) { + if (info.stores_generics[index]) { + Add("layout(location={}) out vec4 out_attr{};", index, index); + } + } DefineConstantBuffers(); DefineStorageBuffers(); DefineHelperFunctions(); - code += "void main(){\n"; + Add("void main(){{"); + + if (stage == Stage::VertexA || stage == Stage::VertexB) { + Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); + } } void EmitContext::SetupExtensions(std::string& header) { diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 7f8857fa7..087eaff6a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -88,6 +88,11 @@ public: Add(format_str, inst, args...); } + template + void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void Add(const char* format_str, Args&&... args) { code += fmt::format(format_str, std::forward(args)...); @@ -100,6 +105,10 @@ public: const Info& info; const Profile& profile; + Stage stage{}; + std::string_view stage_name = "invalid"; + std::string_view attrib_name = "invalid"; + private: void SetupExtensions(std::string& header); void DefineConstantBuffers(); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 8e7ad68bd..048b12f38 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -155,16 +155,14 @@ void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_vie ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractF32x3([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } -void EmitCompositeExtractF32x4([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view composite, - [[maybe_unused]] u32 index) { - throw NotImplementedException("GLSL Instruction"); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index) { + ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); } void EmitCompositeInsertF32x2([[maybe_unused]] EmitContext& ctx, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 7c9cadd7e..441818c0b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -51,4 +51,57 @@ void EmitGetCbufU32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } + +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.AddF32("{}=in_attr{}.{};", inst, index, swizzle); + return; + } + switch (attr) { + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.AddF32("{}=gl_Position.{};", inst, swizzle); + break; + default: + fmt::print("Get attribute {}", attr); + throw NotImplementedException("Get attribute {}", attr); + } +} + +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, + [[maybe_unused]] std::string_view vertex) { + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; + if (IR::IsGeneric(attr)) { + const u32 index{IR::GenericAttributeIndex(attr)}; + ctx.Add("out_attr{}.{}={};", index, swizzle, value); + return; + } + switch (attr) { + case IR::Attribute::PointSize: + ctx.Add("gl_Pointsize={};", value); + break; + case IR::Attribute::PositionX: + case IR::Attribute::PositionY: + case IR::Attribute::PositionZ: + case IR::Attribute::PositionW: + ctx.Add("gl_Position.{}={};", swizzle, value); + break; + default: + fmt::print("Set attribute {}", attr); + throw NotImplementedException("Set attribute {}", attr); + } +} + +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { + const char swizzle{"xyzw"[component]}; + ctx.Add("frag_color{}.{}={};", index, swizzle, value); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index e69de29bb..109938e0e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -0,0 +1,205 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "shader_recompiler/backend/glsl/emit_context.h" +#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" +#include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" + +namespace Shader::Backend::GLSL { + +void EmitImageSampleImplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleExplicitLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view dref, + [[maybe_unused]] std::string_view bias_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view dref, + [[maybe_unused]] std::string_view lod_lc, + [[maybe_unused]] const IR::Value& offset) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGather([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGatherDref([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] const IR::Value& offset, + [[maybe_unused]] const IR::Value& offset2, + [[maybe_unused]] std::string_view dref) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageFetch([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view offset, [[maybe_unused]] std::string_view lod, + [[maybe_unused]] std::string_view ms) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageQueryDimensions([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view lod) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageQueryLod([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageGradient([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view derivates, + [[maybe_unused]] std::string_view offset, + [[maybe_unused]] std::string_view lod_clamp) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageRead([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitImageWrite([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& index, + [[maybe_unused]] std::string_view coords, + [[maybe_unused]] std::string_view color) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGather(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGatherDref(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageFetch(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageQueryDimensions(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageQueryLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageGradient(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageRead(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBindlessImageWrite(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGather(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGatherDref(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageFetch(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageQueryDimensions(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageQueryLod(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageGradient(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageRead(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +void EmitBoundImageWrite(EmitContext&) { + throw NotImplementedException("GLSL Instruction"); +} + +} // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 9f32070b0..49ab108bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -61,7 +61,8 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex); +void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex); @@ -180,8 +181,10 @@ void EmitCompositeConstructF32x4(EmitContext& ctx, std::string_view e1, std::str std::string_view e3, std::string_view e4); void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, u32 index); -void EmitCompositeExtractF32x3(EmitContext& ctx, std::string_view composite, u32 index); -void EmitCompositeExtractF32x4(EmitContext& ctx, std::string_view composite, u32 index); +void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); +void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, + u32 index); void EmitCompositeInsertF32x2(EmitContext& ctx, std::string_view composite, std::string_view object, u32 index); void EmitCompositeInsertF32x3(EmitContext& ctx, std::string_view composite, std::string_view object, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index b37b3c76d..14a2edd74 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -83,7 +83,7 @@ void EmitUnreachable(EmitContext& ctx) { } void EmitDemoteToHelperInvocation(EmitContext& ctx, std::string_view continue_label) { - NotImplemented(); + ctx.Add("discard;"); } void EmitBarrier(EmitContext& ctx) { @@ -146,15 +146,6 @@ void EmitGetIndirectBranchVariable(EmitContext& ctx) { NotImplemented(); } -void EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view vertex) { - NotImplemented(); -} - -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, - std::string_view vertex) { - NotImplemented(); -} - void EmitGetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view vertex) { NotImplemented(); } @@ -172,10 +163,6 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { NotImplemented(); } -void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { - NotImplemented(); -} - void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { NotImplemented(); } @@ -456,169 +443,6 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, std::string_view pointer_offse NotImplemented(); } -void EmitBindlessImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBindlessImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGather(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGatherDref(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageFetch(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryDimensions(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageQueryLod(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageGradient(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageRead(EmitContext&) { - NotImplemented(); -} - -void EmitBoundImageWrite(EmitContext&) { - NotImplemented(); -} - -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view bias_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view lod_lc, - const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view bias_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view dref, - std::string_view lod_lc, const IR::Value& offset) { - NotImplemented(); -} - -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { - NotImplemented(); -} - -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, const IR::Value& offset, const IR::Value& offset2, - std::string_view dref) { - NotImplemented(); -} - -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view offset, std::string_view lod, - std::string_view ms) { - NotImplemented(); -} - -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view lod) { - NotImplemented(); -} - -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { - NotImplemented(); -} - -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view derivates, std::string_view offset, - std::string_view lod_clamp) { - NotImplemented(); -} - -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords) { - NotImplemented(); -} - -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, - std::string_view coords, std::string_view color) { - NotImplemented(); -} - void EmitBindlessImageAtomicIAdd32(EmitContext&) { NotImplemented(); } diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index c60a87d91..a080d5341 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -55,6 +55,8 @@ std::string MakeImm(const IR::Value& value) { return fmt::format("{}ul", value.U64()); case IR::Type::F64: return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); + case IR::Type::Void: + return ""; default: throw NotImplementedException("Immediate type {}", value.Type()); } @@ -131,6 +133,10 @@ std::string RegAlloc::GetType(Type type, u32 index) { return "uvec2 "; case Type::F32x2: return "vec2 "; + case Type::U32x4: + return "uvec4 "; + case Type::F32x4: + return "vec4 "; case Type::Void: return ""; default: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 419e1e761..df067d3ad 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -27,6 +27,8 @@ enum class Type : u32 { F64, U32x2, F32x2, + U32x4, + F32x4, Void, }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e00d01e34..8a052851b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -440,7 +440,6 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - OGLShader shader; AttachShader(Stage(stage_index), source_program.handle, code); } } -- cgit v1.2.3 From e35ffbbeb0f85f676416fcb8f0bb0207671f379d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 30 May 2021 00:53:26 -0400 Subject: glsl: Implement VOTE for subgroup size potentially larger --- .../backend/glsl/emit_context.cpp | 12 ++++-- .../backend/glsl/emit_glsl_warp.cpp | 43 ++++++++++++++-------- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 5 files changed, 43 insertions(+), 20 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 5456d4e5b..c6325e55f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_separate_shader_objects : enable\n"; - header += "#extension GL_ARB_sparse_texture2 : enable\n"; - header += "#extension GL_EXT_texture_shadow_lod : enable\n"; - // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; + if (stage != Stage::Compute) { + // TODO: track this usage + header += "#extension GL_ARB_sparse_texture2 : enable\n"; + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } @@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n"; header += "#extension GL_ARB_shader_group_vote : enable\n"; + header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; + if (!info.uses_int64) { + header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; + } } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index e462c977c..8a018acb5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in } void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); + } } void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=anyInvocationARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=anyInvocationARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); + } } void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); + } else { + const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; + const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; + const auto value{fmt::format("({}^{})", ballot, active_mask)}; + ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); + } } void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { - ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); - // TODO: - // if (ctx.profile.warp_size_potentially_larger_than_guest) { - // } + if (!ctx.profile.warp_size_potentially_larger_than_guest) { + ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); + } else { + ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); + } } void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 071133781..20ea42cff 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9b9402c29..ff0ff2b08 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool IsWarpSizePotentiallyLargerThanGuest() const { + return warp_size_potentially_larger_than_guest; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -161,6 +165,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool warp_size_potentially_larger_than_guest{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8a052851b..cd11ff653 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .warp_size_potentially_larger_than_guest = true, + .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), .lower_left_origin_mode = true, .need_declared_frag_colors = true, -- cgit v1.2.3 From f4799e8fa15b92d8d5607dc5dfca4974901ee06c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:33:03 -0400 Subject: glsl: Implement transform feedback --- .../backend/glsl/emit_context.cpp | 53 ++++++++++++++++++---- src/shader_recompiler/backend/glsl/emit_context.h | 8 ++++ .../backend/glsl/emit_glsl_context_get_set.cpp | 15 ++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 ++++++-- 4 files changed, 76 insertions(+), 18 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 6f10002fe..58355d5e3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -37,7 +37,6 @@ bool StoresPerVertexAttributes(Stage stage) { case Stage::VertexA: case Stage::VertexB: case Stage::Geometry: - case Stage::TessellationControl: case Stage::TessellationEval: return true; default: @@ -154,9 +153,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { return; } header += "out gl_PerVertex{"; - if (info.stores_position) { - header += "vec4 gl_Position;"; - } + header += "vec4 gl_Position;"; if (info.stores_point_size) { header += "float gl_PointSize;"; } @@ -236,10 +233,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - const auto declaration{fmt::format("layout(location={}) out vec4 out_attr{}{};", index, - index, OutputDecorator(stage, program.invocations))}; if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { - header += declaration; + DefineGenericOutput(index, program.invocations); } } header += "\n"; @@ -312,13 +307,53 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { } } +void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { + static constexpr std::string_view swizzle{"xyzw"}; + const size_t base_index{static_cast(IR::Attribute::Generic0X) + index * 4}; + u32 element{0}; + while (element < 4) { + std::string definition{fmt::format("layout(location={}", index)}; + const u32 remainder{4 - element}; + const TransformFeedbackVarying* xfb_varying{}; + if (!runtime_info.xfb_varyings.empty()) { + xfb_varying = &runtime_info.xfb_varyings[base_index + element]; + xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; + } + const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; + if (element > 0) { + definition += fmt::format(",component={}", element); + } + if (xfb_varying) { + definition += + fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, + xfb_varying->stride, xfb_varying->offset); + } + std::string name{fmt::format("out_attr{}", index)}; + if (num_components < 4 || element > 0) { + name += fmt::format("_{}", swizzle.substr(element, num_components)); + } + const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; + definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); + header += definition; + + const GenericElementInfo element_info{ + .name = name, + .first_element = element, + .num_components = num_components, + }; + std::fill_n(output_generics[index].begin() + element, num_components, element_info); + element += num_components; + } + header += "\n"; +} + void EmitContext::DefineHelperFunctions() { if (info.uses_global_increment || info.uses_shared_increment) { header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; } if (info.uses_global_decrement || info.uses_shared_decrement) { - header += - "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; + header += "uint CasDecrement(uint op_a,uint " + "op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; } if (info.uses_atomic_f32_add) { header += "uint CasFloatAdd(uint op_a,float op_b){return " diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 48786a2c7..5d48675e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -30,6 +30,12 @@ struct Program; namespace Shader::Backend::GLSL { +struct GenericElementInfo { + std::string name{}; + u32 first_element{}; + u32 num_components{}; +}; + class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -149,6 +155,7 @@ public: std::vector image_buffer_bindings; std::vector texture_bindings; std::vector image_bindings; + std::array, 32> output_generics{}; bool uses_y_direction{}; bool uses_cc_carry{}; @@ -157,6 +164,7 @@ private: void SetupExtensions(std::string& header); void DefineConstantBuffers(Bindings& bindings); void DefineStorageBuffers(Bindings& bindings); + void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); void SetupImages(Bindings& bindings); }; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0cf31329d..c48492a17 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -200,13 +200,21 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, [[maybe_unused]] std::string_view vertex) { - const u32 element{static_cast(attr) % 4}; - const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - ctx.Add("out_attr{}{}.{}={};", index, OutputVertexIndex(ctx, vertex), swizzle, value); + const u32 element{IR::GenericAttributeElement(attr)}; + const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; + const auto output_decorator{OutputVertexIndex(ctx, vertex)}; + if (info.num_components == 1) { + ctx.Add("{}{}={};", info.name, output_decorator, value); + } else { + const u32 index_element{element - info.first_element}; + ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); + } return; } + const u32 element{static_cast(attr) % 4}; + const char swizzle{"xyzw"[element]}; switch (attr) { case IR::Attribute::PointSize: ctx.Add("gl_PointSize={};", value); @@ -233,7 +241,6 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val break; } default: - fmt::print("Set attribute {}", attr); throw NotImplementedException("Set attribute {}", attr); } } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cd11ff653..0a1ba363b 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -92,9 +92,15 @@ GLenum AssemblyStage(size_t stage_index) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, - bool glasm_use_storage_buffers) { + bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; switch (program.stage) { + case Shader::Stage::VertexB: + case Shader::Stage::Geometry: + if (!use_assembly_shaders && key.xfb_enabled != 0) { + info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); + } + break; case Shader::Stage::TessellationEval: info.tess_clockwise = key.tessellation_clockwise != 0; info.tess_primitive = [&key] { @@ -420,7 +426,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array assembly_programs; Shader::Backend::Bindings binding; - if (!device.UseAssemblyShaders()) { + const bool use_glasm{device.UseAssemblyShaders()}; + if (!use_glasm) { source_program.handle = glCreateProgram(); } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -434,8 +441,9 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; - if (device.UseAssemblyShaders()) { + const auto runtime_info{ + MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + if (use_glasm) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); } else { @@ -443,7 +451,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( AttachShader(Stage(stage_index), source_program.handle, code); } } - if (!device.UseAssemblyShaders()) { + if (!use_glasm) { LinkProgram(source_program.handle); } return std::make_unique( -- cgit v1.2.3 From 6577a63d368afa57d5f29df40e524af30eaabffa Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 2 Jun 2021 00:48:49 -0400 Subject: glsl: skip gl_ViewportIndex write if device does not support it --- src/shader_recompiler/backend/glsl/emit_context.cpp | 18 ++++++++++-------- src/shader_recompiler/backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 5 files changed, 18 insertions(+), 8 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 58355d5e3..846d38bfc 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,23 +148,24 @@ std::string_view OutputPrimitive(OutputTopology topology) { throw InvalidArgument("Invalid output topology {}", topology); } -void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { - if (!StoresPerVertexAttributes(stage)) { +void SetupOutPerVertex(EmitContext& ctx, std::string& header) { + if (!StoresPerVertexAttributes(ctx.stage)) { return; } header += "out gl_PerVertex{"; header += "vec4 gl_Position;"; - if (info.stores_point_size) { + if (ctx.info.stores_point_size) { header += "float gl_PointSize;"; } - if (info.stores_clip_distance) { + if (ctx.info.stores_clip_distance) { header += "float gl_ClipDistance[];"; } - if (info.stores_viewport_index && stage != Stage::Geometry) { + if (ctx.info.stores_viewport_index && ctx.supports_viewport_layer && + ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } header += "};\n"; - if (info.stores_viewport_index && stage == Stage::Geometry) { + if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } @@ -173,6 +174,7 @@ void SetupOutPerVertex(Stage stage, const Info& info, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { + supports_viewport_layer = profile.support_gl_vertex_viewport_layer; SetupExtensions(header); stage = program.stage; switch (program.stage) { @@ -206,7 +208,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile program.workgroup_size[2]); break; } - SetupOutPerVertex(stage, info, header); + SetupOutPerVertex(*this, header); for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; if (generic.used) { @@ -276,7 +278,7 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } } - if (info.stores_viewport_index && stage != Stage::Geometry) { + if (info.stores_viewport_index && supports_viewport_layer && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 5d48675e6..26a76f8a3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -159,6 +159,7 @@ public: bool uses_y_direction{}; bool uses_cc_carry{}; + bool supports_viewport_layer{}; private: void SetupExtensions(std::string& header); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index c48492a17..ebaf50abd 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -226,6 +226,11 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_Position.{}={};", swizzle, value); break; case IR::Attribute::ViewportIndex: + if (ctx.stage != Stage::Geometry && !ctx.supports_viewport_layer) { + // LOG_WARNING(..., "Shader stores viewport index but device does not support viewport + // layer extension"); + break; + } ctx.Add("gl_ViewportIndex=floatBitsToInt({});", value); break; case IR::Attribute::ClipDistance0: diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 5d269368a..420117132 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -85,6 +85,7 @@ struct Profile { bool support_derivative_control{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; + bool support_gl_vertex_viewport_layer{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0a1ba363b..77681594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,6 +225,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), + .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 747b8556a4611791c1b0afbb500c77de57adfc54 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 4 Jun 2021 00:46:46 -0400 Subject: glsl: Use textureGrad fallback when EXT_texture_shadow_lod is unsupported --- .../backend/glsl/emit_context.cpp | 4 +- .../backend/glsl/emit_glsl_image.cpp | 44 ++++++++++++++++++---- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 42 insertions(+), 8 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ecc7335ba..76cf0bdf0 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -282,8 +282,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile void EmitContext::SetupExtensions(std::string&) { // TODO: track this usage header += "#extension GL_ARB_sparse_texture2 : enable\n" - "#extension GL_EXT_texture_shadow_lod : enable\n" "#extension GL_EXT_shader_image_load_formatted : enable\n"; + if (profile.support_gl_texture_shadow_lod) { + header += "#extension GL_EXT_texture_shadow_lod : enable\n"; + } if (info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index a62e2b181..6cf0300ab 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -67,14 +68,14 @@ std::string TexelFetchCastToInt(std::string_view value, const IR::TextureInstInf } } -std::string ShadowSamplerVecCast(TextureType type) { +bool NeedsShadowLodExt(TextureType type) { switch (type) { case TextureType::ColorArray2D: case TextureType::ColorCube: case TextureType::ColorArrayCube: - return "vec4"; + return true; default: - return "vec3"; + return false; } } @@ -221,7 +222,22 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx, } const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; - const auto cast{ShadowSamplerVecCast(info.type)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && + ctx.stage != Stage::Fragment && needs_shadow_ext}; + if (use_grad) { + // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { @@ -263,15 +279,29 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx, throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } const auto texture{Texture(ctx, info, index)}; - const auto cast{ShadowSamplerVecCast(info.type)}; + const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; + const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; + const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; + if (use_grad) { + // LOG_WARNING(..., "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); + if (info.type == TextureType::ColorArrayCube) { + // LOG_WARNING(..., "textureGrad does not support ColorArrayCube. Stubbing"); + ctx.AddF32("{}=0.0f;", inst); + return; + } + const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; + ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, + d_cast, d_cast); + return; + } if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (info.type == TextureType::ColorArrayCube) { ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, offset_str); } else { - ctx.AddF32("{}=textureLodOffset({},vec3({},{}),{},{});", inst, texture, coords, dref, - lod_lc, offset_str); + ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, + dref, lod_lc, offset_str); } } else { if (info.type == TextureType::ColorArrayCube) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 420117132..3bbd5a531 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -86,6 +86,7 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_vertex_viewport_layer{}; + bool support_gl_texture_shadow_lod{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 77681594a..b4c634d29 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), + .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 970fc39d986c5eefa1c4b61ac89ef7e8c2bf23bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 7 Jun 2021 19:05:11 -0400 Subject: glsl: Rebase fixes --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_util.cpp | 7 +++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b4c634d29..3d229a78c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -225,7 +225,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_derivative_control = device.HasDerivativeControl(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), - .support_gl_vertex_viewport_layer = device.HasVertexViewportLayer(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 99cb81819..ac6f33e34 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader) { +static void LogShader(GLuint shader, std::optional code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,6 +28,9 @@ static void LogShader(GLuint shader) { glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); + if (code.has_value()) { + LOG_INFO(Render_OpenGL, "\n{}", *code); + } } else { LOG_WARNING(Render_OpenGL, "{}", log); } @@ -43,7 +46,7 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { glCompileShader(shader.handle); glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { - LogShader(shader.handle); + LogShader(shader.handle, code); } } -- cgit v1.2.3 From 8bb8bbf4ae2ef259857efe49436dfd71758ea092 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 01:55:12 -0400 Subject: glsl: Implement fswzadd and wip nv thread shuffle impl --- .../backend/glsl/emit_context.cpp | 11 +++++++ src/shader_recompiler/backend/glsl/emit_context.h | 1 + .../backend/glsl/emit_glsl_warp.cpp | 36 +++++++++++++++++++--- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 5 files changed, 45 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index ed0955da0..6c2828644 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -306,6 +306,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupImages(bindings); SetupTextures(bindings); DefineHelperFunctions(); + DefineConstants(); } void EmitContext::SetupExtensions(std::string&) { @@ -339,6 +340,9 @@ void EmitContext::SetupExtensions(std::string&) { if (!info.uses_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } + if (profile.support_gl_warp_intrinsics) { + header += "#extension GL_NV_shader_thread_shuffle : enable\n"; + } } if (info.stores_viewport_index && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { @@ -605,4 +609,11 @@ void EmitContext::SetupTextures(Bindings& bindings) { } } +void EmitContext::DefineConstants() { + if (info.uses_fswzadd) { + header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" + "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; + } +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index dce99586e..2b0d22ce5 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -167,6 +167,7 @@ private: void DefineStorageBuffers(Bindings& bindings); void DefineGenericOutput(size_t index, u32 invocations); void DefineHelperFunctions(); + void DefineConstants(); std::string DefineGlobalMemoryFunctions(); void SetupImages(Bindings& bindings); void SetupTextures(Bindings& bindings); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 38c49b164..6ced0776c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -35,9 +35,17 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); } + +void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, + std::string_view value, std::string_view index, + [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { + const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; + ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); + SetInBoundsFlag(ctx, inst); +} } // namespace -void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { +void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); } @@ -103,6 +111,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); + return; + } const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; const auto thread_id{"gl_SubGroupInvocationARB"}; const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; @@ -117,6 +129,10 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; @@ -128,6 +144,10 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; @@ -139,6 +159,10 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, std::string_view clamp, std::string_view segmentation_mask) { + if (ctx.profile.support_gl_warp_intrinsics) { + UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); + return; + } const auto thread_id{"gl_SubGroupInvocationARB"}; const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; @@ -147,10 +171,12 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); } -void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, - [[maybe_unused]] std::string_view swizzle) { - NotImplemented(); +void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, + std::string_view swizzle) { + const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; + const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); + const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); + ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3a4495070..246995190 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -86,6 +86,7 @@ struct Profile { bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; + bool support_gl_warp_intrinsics{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d229a78c..4fcf4e458 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -226,6 +226,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), + .support_gl_warp_intrinsics = false, .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 6650c4799d42044f087a1ac5cb5e4b1a9e899000 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 8 Jun 2021 23:52:28 -0400 Subject: gl_rasterizer: Add texture fetch barrier for fragments Fixes flicker seen in XC2 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 54696d97d..7513bd071 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -437,7 +437,7 @@ void RasterizerOpenGL::WaitForIdle() { } void RasterizerOpenGL::FragmentBarrier() { - glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT); + glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); } void RasterizerOpenGL::TiledCacheBarrier() { -- cgit v1.2.3 From e81c73a8748ccfcde56acfee5630116c3950e479 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 11 Jun 2021 02:50:30 -0400 Subject: glsl: Address more feedback. Implement indexed texture reads --- .../backend/glsl/emit_context.cpp | 48 +++----- src/shader_recompiler/backend/glsl/emit_context.h | 13 +- .../backend/glsl/emit_glsl_image.cpp | 136 ++++++++++----------- .../backend/glsl/emit_glsl_select.cpp | 4 +- .../backend/glsl/emit_glsl_shared_memory.cpp | 19 ++- src/video_core/renderer_opengl/gl_shader_util.cpp | 6 +- 6 files changed, 112 insertions(+), 114 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index cbcf0a1eb..ed10eca8a 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -559,53 +559,45 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { } void EmitContext::SetupImages(Bindings& bindings) { - image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); + image_buffers.reserve(info.image_buffer_descriptors.size()); for (const auto& desc : info.image_buffer_descriptors) { - image_buffer_bindings.push_back(bindings.image); - const auto indices{bindings.image + desc.count}; + image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{};", - bindings.image, format, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, + format, bindings.image, array_decorator); bindings.image += desc.count; } - image_bindings.reserve(info.image_descriptors.size()); + images.reserve(info.image_descriptors.size()); for (const auto& desc : info.image_descriptors) { - image_bindings.push_back(bindings.image); + images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; const auto qualifier{desc.is_written ? "" : "readonly "}; - const auto indices{bindings.image + desc.count}; - for (u32 index = bindings.image; index < indices; ++index) { - header += fmt::format("layout(binding={}{})uniform {}{} img{};", bindings.image, format, - qualifier, image_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, + qualifier, image_type, bindings.image, array_decorator); bindings.image += desc.count; } } void EmitContext::SetupTextures(Bindings& bindings) { - texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); + texture_buffers.reserve(info.texture_buffer_descriptors.size()); for (const auto& desc : info.texture_buffer_descriptors) { - texture_buffer_bindings.push_back(bindings.texture); + texture_buffers.push_back({bindings.texture, desc.count}); const auto sampler_type{SamplerType(TextureType::Buffer, false)}; - const auto indices{bindings.texture + desc.count}; - for (u32 index = bindings.texture; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, - sampler_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); bindings.texture += desc.count; } - texture_bindings.reserve(info.texture_descriptors.size()); + textures.reserve(info.texture_descriptors.size()); for (const auto& desc : info.texture_descriptors) { + textures.push_back({bindings.texture, desc.count}); const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; - texture_bindings.push_back(bindings.texture); - const auto indices{bindings.texture + desc.count}; - for (u32 index = bindings.texture; index < indices; ++index) { - header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, - sampler_type, index); - } + const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; + header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, + sampler_type, bindings.texture, array_decorator); bindings.texture += desc.count; } } diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 9d8be0c9a..685f56089 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -36,6 +36,11 @@ struct GenericElementInfo { u32 num_components{}; }; +struct TextureImageDefinition { + u32 binding; + u32 count; +}; + class EmitContext { public: explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, @@ -142,10 +147,10 @@ public: std::string_view stage_name = "invalid"; std::string_view position_name = "gl_Position"; - std::vector texture_buffer_bindings; - std::vector image_buffer_bindings; - std::vector texture_bindings; - std::vector image_bindings; + std::vector texture_buffers; + std::vector image_buffers; + std::vector textures; + std::vector images; std::array, 32> output_generics{}; bool uses_y_direction{}; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 00fe288e2..6a98f7ac2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -12,20 +12,18 @@ namespace Shader::Backend::GLSL { namespace { -std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info) { - if (info.type == TextureType::Buffer) { - return fmt::format("tex{}", ctx.texture_buffer_bindings.at(info.descriptor_index)); - } else { - return fmt::format("tex{}", ctx.texture_bindings.at(info.descriptor_index)); - } +std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) + : ctx.textures.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("tex{}{}", def.binding, index_offset); } -std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info) { - if (info.type == TextureType::Buffer) { - return fmt::format("img{}", ctx.image_buffer_bindings.at(info.descriptor_index)); - } else { - return fmt::format("img{}", ctx.image_bindings.at(info.descriptor_index)); - } +std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { + const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) + : ctx.images.at(info.descriptor_index)}; + const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; + return fmt::format("img{}{}", def.binding, index_offset); } std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { @@ -137,14 +135,14 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { } } // Anonymous namespace -void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view bias_lc, const IR::Value& offset) { +void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view bias_lc, + const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; @@ -175,9 +173,9 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view lod_lc, const IR::Value& offset) { +void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view lod_lc, + const IR::Value& offset) { const auto info{inst.Flags()}; if (info.has_bias) { throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); @@ -185,7 +183,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -208,8 +206,7 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, +void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; @@ -223,7 +220,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -263,8 +260,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, +void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view dref, std::string_view lod_lc, const IR::Value& offset) { const auto info{inst.Flags()}; @@ -278,7 +274,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, if (info.has_lod_clamp) { throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; @@ -313,10 +309,10 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, } } -void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -355,11 +351,11 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR info.gather_component); } -void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& offset, const IR::Value& offset2, std::string_view dref) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; if (!sparse_inst) { @@ -395,7 +391,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] cons *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); } -void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view offset, std::string_view lod, [[maybe_unused]] std::string_view ms) { const auto info{inst.Flags()}; @@ -405,7 +401,7 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR: if (info.has_lod_clamp) { throw NotImplementedException("EmitImageFetch Lod clamp samples"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; if (!sparse_inst) { @@ -433,10 +429,10 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR: } } -void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view lod) { +void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view lod) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; switch (info.type) { case TextureType::Color1D: return ctx.AddU32x4( @@ -460,14 +456,14 @@ void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, throw LogicError("Unspecified image type {}", info.type.Value()); } -void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { const auto info{inst.Flags()}; - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); } -void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, const IR::Value& derivatives, const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { const auto info{inst.Flags()}; @@ -481,7 +477,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const if (!offset.IsEmpty()) { throw NotImplementedException("EmitImageGradient offset"); } - const auto texture{Texture(ctx, info)}; + const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; @@ -494,65 +490,60 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const } } -void EmitImageRead(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords) { const auto info{inst.Flags()}; const auto sparse_inst{PrepareSparse(inst)}; if (sparse_inst) { throw NotImplementedException("EmitImageRead Sparse"); } - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, TexelFetchCastToInt(coords, info)); } -void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view color) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.Add("imageStore({},{},{});", image, TexelFetchCastToInt(coords, info), color); } -void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, TexelFetchCastToInt(coords, info), value); } @@ -567,35 +558,34 @@ void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string NotImplemented(); } -void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& index, +void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } -void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, - [[maybe_unused]] const IR::Value& index, std::string_view coords, - std::string_view value) { +void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, + std::string_view coords, std::string_view value) { const auto info{inst.Flags()}; - const auto image{Image(ctx, info)}; + const auto image{Image(ctx, info, index)}; ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, TexelFetchCastToInt(coords, info), value); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp index 7aa6096e6..49fba9073 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp @@ -28,12 +28,12 @@ void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::stri void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU32("{}={}?uint({}):uint({});", inst, cond, true_value, false_value); + ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, std::string_view true_value, std::string_view false_value) { - ctx.AddU64("{}={}?uint64_t({}):uint64_t({});", inst, cond, true_value, false_value); + ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); } void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 7abc6575f..8a13bf617 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -9,6 +9,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " + "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" + "if(cas_result==old_value){{break;}}}}"}; + +void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, + std::string_view bit_offset, u32 num_bits) { + const auto smem{fmt::format("smem[{}>>2]", offset)}; + ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); +} +} // Anonymous namespace void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } @@ -39,13 +50,13 @@ void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offse } void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { - ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int({}%4)*8,8);", offset, offset, value, - offset); + const auto bit_offset{fmt::format("int({}%4)*8", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 8); } void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { - ctx.Add("smem[{}>>2]=bitfieldInsert(smem[{}>>2],{},int(({}>>1)%2)*16,16);", offset, offset, - value, offset); + const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; + SharedWriteCas(ctx, offset, value, bit_offset, 16); } void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index ac6f33e34..5109985f1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,7 +13,7 @@ namespace OpenGL { -static void LogShader(GLuint shader, std::optional code = {}) { +static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); if (shader_status == GL_FALSE) { @@ -28,8 +28,8 @@ static void LogShader(GLuint shader, std::optional code = {}) glGetShaderInfoLog(shader, log_length, nullptr, log.data()); if (shader_status == GL_FALSE) { LOG_ERROR(Render_OpenGL, "{}", log); - if (code.has_value()) { - LOG_INFO(Render_OpenGL, "\n{}", *code); + if (!code.empty()) { + LOG_INFO(Render_OpenGL, "\n{}", code); } } else { LOG_WARNING(Render_OpenGL, "{}", log); -- cgit v1.2.3 From 413eb6983f07bb4139cd07c5dca22bdb30e6af2d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:06:11 -0400 Subject: gl_shader_cache: Move OGL shader compilation to the respective Pipeline constructor --- .../renderer_opengl/gl_compute_pipeline.cpp | 13 +++- .../renderer_opengl/gl_compute_pipeline.h | 2 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 67 ++++++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 4 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 69 +++------------------- 5 files changed, 79 insertions(+), 76 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index a40106c87..f984b635c 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -7,6 +7,7 @@ #include "common/cityhash.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -39,10 +40,16 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_) + const std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, - source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { + kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { + if (device.UseAssemblyShaders()) { + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + } else { + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); + LinkProgram(source_program.handle); + } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5dfb65e9..a93166eb6 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - OGLProgram source_program_, OGLAssemblyProgram assembly_program_); + const std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a2ea35d5a..4d62d7062 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -9,6 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/texture_cache/texture_cache.h" @@ -33,6 +34,40 @@ u32 AccumulateCount(const Range& range) { return num; } +GLenum Stage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_SHADER; + case 1: + return GL_TESS_CONTROL_SHADER; + case 2: + return GL_TESS_EVALUATION_SHADER; + case 3: + return GL_GEOMETRY_SHADER; + case 4: + return GL_FRAGMENT_SHADER; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + +GLenum AssemblyStage(size_t stage_index) { + switch (stage_index) { + case 0: + return GL_VERTEX_PROGRAM_NV; + case 1: + return GL_TESS_CONTROL_PROGRAM_NV; + case 2: + return GL_TESS_EVALUATION_PROGRAM_NV; + case 3: + return GL_GEOMETRY_PROGRAM_NV; + case 4: + return GL_FRAGMENT_PROGRAM_NV; + } + UNREACHABLE_MSG("{}", stage_index); + return GL_NONE; +} + /// Translates hardware transform feedback indices /// @param location Hardware location /// @return Pair of ARB_transform_feedback3 token stream first and third arguments @@ -82,19 +117,33 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, - gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, - state_tracker{state_tracker_}, program{std::move(program_)}, assembly_programs{std::move( - assembly_programs_)} { + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, + maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - - for (size_t stage = 0; stage < 5; ++stage) { - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{assembly_sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{glsl_sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 508fad5bb..984bf994f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - OGLProgram program_, - std::array assembly_programs_, + const std::array assembly_sources, + const std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4fcf4e458..884739aec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -56,40 +56,6 @@ auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); } -GLenum Stage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_SHADER; - case 1: - return GL_TESS_CONTROL_SHADER; - case 2: - return GL_TESS_EVALUATION_SHADER; - case 3: - return GL_GEOMETRY_SHADER; - case 4: - return GL_FRAGMENT_SHADER; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - -GLenum AssemblyStage(size_t stage_index) { - switch (stage_index) { - case 0: - return GL_VERTEX_PROGRAM_NV; - case 1: - return GL_TESS_CONTROL_PROGRAM_NV; - case 2: - return GL_TESS_EVALUATION_PROGRAM_NV; - case 3: - return GL_GEOMETRY_PROGRAM_NV; - case 4: - return GL_FRAGMENT_PROGRAM_NV; - } - UNREACHABLE_MSG("{}", stage_index); - return GL_NONE; -} - Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { @@ -426,12 +392,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_programs; + std::array assembly_sources; + std::array glsl_sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; - if (!use_glasm) { - source_program.handle = glCreateProgram(); - } const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -446,20 +410,14 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; - assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); + assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - const auto code{EmitGLSL(profile, runtime_info, program, binding)}; - AttachShader(Stage(stage_index), source_program.handle, code); + glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } - if (!use_glasm) { - LinkProgram(source_program.handle); - } return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - std::move(source_program), std::move(assembly_programs), infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -496,21 +454,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - - OGLAssemblyProgram asm_program; - OGLProgram source_program; - if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, info, program)}; - asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { - const auto code{EmitGLSL(profile, program)}; - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); - } + const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) + : EmitGLSL(profile, program)}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, - std::move(source_program), std::move(asm_program)); + kepler_compute, program_manager, program.info, code); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; -- cgit v1.2.3 From ff3de0fb6bb46bcb59421cef203ca8e8daaec85c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 12 Jun 2021 02:11:13 -0400 Subject: gl_shader_cache: Remove const from pipeline source arguments --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 2 +- src/video_core/renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 4 ++-- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index f984b635c..2d6442d74 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -40,7 +40,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code) + std::string code) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { if (device.UseAssemblyShaders()) { diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index a93166eb6..b5fc45f26 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - const std::string code); + std::string code); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 4d62d7062..d64723d6b 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -117,8 +117,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 984bf994f..dc791be53 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -65,8 +65,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - const std::array assembly_sources, - const std::array glsl_sources, + std::array assembly_sources, + std::array glsl_sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); -- cgit v1.2.3 From 5e7b2b9661bf685c3950d7c4065d0d35b488f95c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 13 Jun 2021 00:05:19 -0400 Subject: glsl: Add stubs for sparse queries and variable aoffi when not supported --- .../backend/glsl/emit_context.cpp | 2 +- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_image.cpp | 46 ++++++++++++++++------ src/shader_recompiler/profile.h | 2 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 + 7 files changed, 47 insertions(+), 13 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index f0e9dffc2..d0880bdcb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -384,7 +384,7 @@ void EmitContext::SetupExtensions() { profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } - if (info.uses_sparse_residency) { + if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } if (info.stores_viewport_mask && profile.support_viewport_mask) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index d76b63b2d..6d64913bb 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -215,7 +215,7 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR ctx.header += fmt::format("shared uint smem[{}];", Common::AlignUp(program.shared_memory_size, 4)); } - ctx.header += "\nvoid main(){\n"; + ctx.header += "void main(){\n"; if (program.stage == Stage::VertexA || program.stage == Stage::VertexB) { ctx.header += "gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"; // TODO: Properly resolve attribute issues diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 51181d1c1..c6b3df9c9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -94,7 +94,11 @@ std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { break; } } - const auto offset_str{ctx.var_alloc.Consume(offset)}; + const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; + if (!has_var_aoffi) { + // LOG_WARNING("Device does not support variable texture offsets, STUBBING"); + } + const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; switch (offset.Type()) { case IR::Type::U32: return fmt::format("int({})", offset_str); @@ -146,7 +150,12 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.IsEmpty()) { const auto offset_str{GetOffsetVec(ctx, offset)}; if (ctx.stage == Stage::Fragment) { @@ -163,7 +172,6 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } return; } - // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); @@ -186,7 +194,12 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.IsEmpty()) { ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, GetOffsetVec(ctx, offset)); @@ -195,7 +208,6 @@ void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu } return; } - // TODO: Query sparseTexels extension support if (!offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, @@ -315,7 +327,12 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, info.gather_component); @@ -332,7 +349,6 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, info.gather_component); return; } - // TODO: Query sparseTexels extension support if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", *sparse_inst, texture, coords, texel, info.gather_component); @@ -358,7 +374,12 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde const auto texture{Texture(ctx, info, index)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; const auto sparse_inst{PrepareSparse(inst)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (offset.IsEmpty()) { ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); return; @@ -373,7 +394,6 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); return; } - // TODO: Query sparseTexels extension support if (offset.IsEmpty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, texture, coords, dref, texel); @@ -404,7 +424,12 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto texture{Texture(ctx, info, index)}; const auto sparse_inst{PrepareSparse(inst)}; const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; - if (!sparse_inst) { + const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; + if (sparse_inst && !supports_sparse) { + // LOG_WARNING(..., "Device does not support sparse texture queries. STUBBING"); + ctx.AddU1("{}=true;", *sparse_inst); + } + if (!sparse_inst || !supports_sparse) { if (!offset.empty()) { ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); @@ -418,7 +443,6 @@ void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, } return; } - // TODO: Query sparseTexels extension support if (!offset.empty()) { ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", *sparse_inst, texture, CastToIntVec(coords, info), lod, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 246995190..236c79a0a 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -87,6 +87,8 @@ struct Profile { bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; bool support_gl_warp_intrinsics{}; + bool support_gl_variable_aoffi{}; + bool support_gl_sparse_textures{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 20ea42cff..bf08a6d93 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; + has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ff0ff2b08..0b59c9df0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -128,6 +128,10 @@ public: return has_amd_shader_half_float; } + bool HasSparseTexture2() const { + return has_sparse_texture_2; + } + bool IsWarpSizePotentiallyLargerThanGuest() const { return warp_size_potentially_larger_than_guest; } @@ -165,6 +169,7 @@ private: bool has_depth_buffer_float{}; bool has_nv_gpu_shader_5{}; bool has_amd_shader_half_float{}; + bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 884739aec..3d59d34d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), .support_gl_warp_intrinsics = false, + .support_gl_variable_aoffi = device.HasVariableAoffi(), + .support_gl_sparse_textures = device.HasSparseTexture2(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From 74f683787eeba7b6e8f5868134f445240733f8fd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 21:06:29 -0400 Subject: gl_shader_cache: Implement async shaders --- src/video_core/CMakeLists.txt | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 123 ++++++++++++--------- .../renderer_opengl/gl_graphics_pipeline.h | 14 ++- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 54 +++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 34 ++---- src/video_core/renderer_opengl/gl_shader_context.h | 33 ++++++ 7 files changed, 154 insertions(+), 107 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_shader_context.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1ef3a6189..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -83,6 +83,7 @@ add_library(video_core STATIC renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h + renderer_opengl/gl_shader_context.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d64723d6b..d27a3cf46 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -6,11 +6,13 @@ #include #include "common/cityhash.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -117,74 +119,91 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{assembly_sources[stage]}; - if (code.empty()) { - continue; + auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; - } - } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{glsl_sources[stage]}; - if (code.empty()) { - continue; + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); } - AttachShader(Stage(stage), program.handle, code); + LinkProgram(program.handle); } - LinkProgram(program.handle); - } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + if (shader_notify) { + shader_notify->MarkShaderComplete(); } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += + AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += + AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } + is_built.store(true, std::memory_order_relaxed); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(nullptr); } } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index dc791be53..58deafd3c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,10 +20,15 @@ namespace OpenGL { +namespace ShaderContext { +struct Context; +} + class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderWorker = Common::StatefulThreadWorker; struct GraphicsPipelineKey { std::array unique_hashes; @@ -65,8 +70,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); @@ -82,6 +87,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -108,6 +117,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7513bd071..e3d336f86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -70,7 +70,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, - buffer_cache, program_manager, state_tracker), + buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d59d34d7..d082b9f73 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -17,7 +17,6 @@ #include "common/scope_exit.h" #include "common/thread_worker.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -50,6 +49,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; using VideoCommon::SerializePipeline; +using Context = ShaderContext::Context; template auto MakeSpan(Container& container) { @@ -143,25 +143,17 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace -struct ShaderCache::Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; -}; - ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_) + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, - use_asynchronous_shaders{device.UseAsynchronousShaders()}, + shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, profile{ .supported_spirv = 0x00010000, @@ -264,7 +256,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; std::lock_guard lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -311,6 +303,9 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (is_new) { program = CreateGraphicsPipeline(); } + if (!program || !program->IsBuilt()) { + return nullptr; + } return program.get(); } @@ -339,7 +334,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), + use_asynchronous_shaders)}; if (!pipeline || shader_cache_filename.empty()) { return pipeline; } @@ -354,8 +350,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs) try { + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -394,8 +390,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_sources; - std::array glsl_sources; + std::array sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -412,14 +407,16 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } + auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -442,9 +439,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( return pipeline; } -std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineKey& key, - Shader::Environment& env) try { +std::unique_ptr ShaderCache::CreateComputePipeline( + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -465,11 +462,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } -std::unique_ptr> ShaderCache::CreateWorkers() - const { - return std::make_unique>( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); +std::unique_ptr ShaderCache::CreateWorkers() const { + return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0c5a06d8..d24b54d90 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -13,13 +13,12 @@ #include "common/common_types.h" #include "common/thread_worker.h" -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_context.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -31,29 +30,17 @@ namespace OpenGL { class Device; class ProgramManager; class RasterizerOpenGL; - -struct ShaderPools { - void ReleaseContents() { - flow_block.ReleaseContents(); - block.ReleaseContents(); - inst.ReleaseContents(); - } - - Shader::ObjectPool inst; - Shader::ObjectPool block; - Shader::ObjectPool flow_block; -}; +using ShaderWorker = Common::StatefulThreadWorker; class ShaderCache : public VideoCommon::ShaderCache { - struct Context; - public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_); ~ShaderCache(); void LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -67,17 +54,17 @@ private: std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs); + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputePipeline(ShaderPools& pools, + std::unique_ptr CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env); - std::unique_ptr> CreateWorkers() const; + std::unique_ptr CreateWorkers() const; Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -85,17 +72,18 @@ private: BufferCache& buffer_cache; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineKey graphics_key{}; const bool use_asynchronous_shaders; - ShaderPools main_pools; + ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; Shader::Profile profile; std::filesystem::path shader_cache_filename; - std::unique_ptr> workers; + std::unique_ptr workers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/emu_window.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace OpenGL::ShaderContext { +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + +struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + +} // namespace OpenGL::ShaderContext -- cgit v1.2.3 From 6eea88d6149f7122777b325c7fc8549e2a974e64 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:02:07 -0400 Subject: glsl: Cleanup/Address feedback --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 8 ++++---- .../backend/glsl/emit_glsl_atomic.cpp | 12 ++++++------ .../backend/glsl/emit_glsl_composite.cpp | 3 +-- .../backend/glsl/emit_glsl_context_get_set.cpp | 18 +++++------------- .../backend/glsl/emit_glsl_integer.cpp | 1 + .../backend/glsl/emit_glsl_shared_memory.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 2 +- src/shader_recompiler/backend/glsl/var_alloc.cpp | 3 +-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 ++ 10 files changed, 24 insertions(+), 28 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 6d64913bb..9f8cf659f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -156,8 +156,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { ctx.Add("for(;;){{"); break; case IR::AbstractSyntaxNode::Type::Repeat: - ctx.Add("if({}){{continue;}}else{{break;}}}}", - ctx.var_alloc.Consume(node.data.repeat.cond)); + ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); break; default: throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); @@ -166,7 +165,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { return " compatibility"; } return ""; @@ -187,7 +186,8 @@ void DefineVariables(const EmitContext& ctx, std::string& header) { const auto type{static_cast(i)}; const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; const auto type_name{ctx.var_alloc.GetGlslType(type)}; - const auto precise{IsPreciseType(type) ? "precise " : ""}; + const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; + const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; // Temps/return types that are never used are stored at index 0 if (tracker.uses_temp) { header += fmt::format("{}{} t{}={}(0);", precise, type_name, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 9152ace98..772acc5a4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -98,7 +98,7 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, pointer_offset); ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", @@ -171,7 +171,7 @@ void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Val void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -182,7 +182,7 @@ void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -195,7 +195,7 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -207,7 +207,7 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); @@ -220,7 +220,7 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { - LOG_WARNING(Shader_GLSL, "Int64 Atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset)); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp index 7421ce97d..49a66e3ec 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp @@ -17,8 +17,7 @@ void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view // The result is aliased with the composite ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); } else { - ctx.Add("{}={};", result, composite); - ctx.Add("{}.{}={};", result, SWIZZLE[index], object); + ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); } } } // Anonymous namespace diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 0d1e5ed7f..edeecc26e 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -30,7 +30,7 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; } -std::string OutputVertexIndex(EmitContext& ctx) { +std::string_view OutputVertexIndex(EmitContext& ctx) { return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; } @@ -40,7 +40,7 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, const bool is_immediate{offset.IsImmediate()}; if (is_immediate) { const s32 signed_offset{static_cast(offset.U32())}; - static constexpr u32 cbuf_size{4096 * 16}; + static constexpr u32 cbuf_size{0x10000}; if (signed_offset < 0 || offset.U32() > cbuf_size) { LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); ctx.Add("{}=0u;", ret); @@ -140,7 +140,7 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; if (offset.IsImmediate()) { - static constexpr u32 cbuf_size{4096 * 16}; + static constexpr u32 cbuf_size{0x10000}; const u32 u32_offset{offset.U32()}; const s32 signed_offset{static_cast(offset.U32())}; if (signed_offset < 0 || u32_offset > cbuf_size) { @@ -308,21 +308,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val case IR::Attribute::ColorFrontDiffuseG: case IR::Attribute::ColorFrontDiffuseB: case IR::Attribute::ColorFrontDiffuseA: - if (ctx.stage == Stage::Fragment) { - ctx.Add("gl_Color.{}={};", swizzle, value); - } else { - ctx.Add("gl_FrontColor.{}={};", swizzle, value); - } + ctx.Add("gl_FrontColor.{}={};", swizzle, value); break; case IR::Attribute::ColorFrontSpecularR: case IR::Attribute::ColorFrontSpecularG: case IR::Attribute::ColorFrontSpecularB: case IR::Attribute::ColorFrontSpecularA: - if (ctx.stage == Stage::Fragment) { - ctx.Add("gl_SecondaryColor.{}={};", swizzle, value); - } else { - ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); - } + ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::ColorBackDiffuseR: case IR::Attribute::ColorBackDiffuseG: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 7a2f79d10..983e6d95d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -28,6 +28,7 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { sign->Invalidate(); } } // Anonymous namespace + void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp index 8a13bf617..518b78f06 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp @@ -20,6 +20,7 @@ void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); } } // Anonymous namespace + void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 7047928fd..4d418cbbc 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -43,7 +43,7 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); SetInBoundsFlag(ctx, inst); } -} // namespace +} // Anonymous namespace void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp index cbf56491c..194f926ca 100644 --- a/src/shader_recompiler/backend/glsl/var_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp @@ -177,8 +177,7 @@ Id VarAlloc::Alloc(GlslVarType type) { void VarAlloc::Free(Id id) { if (id.is_valid == 0) { - // throw LogicError("Freeing invalid variable"); - return; + throw LogicError("Freeing invalid variable"); } auto& use_tracker{GetUseTracker(id.type)}; use_tracker.var_use[id.index] = false; diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 236c79a0a..6db794e91 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -105,6 +105,8 @@ struct Profile { bool has_broken_signed_operations{}; /// Dynamic vec4 indexing is broken on some OpenGL drivers bool has_gl_component_indexing_bug{}; + /// The precise type qualifier is broken in the fragment stage of some drivers + bool has_gl_precise_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d082b9f73..5ffe28d45 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -196,6 +196,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), + .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, } { if (use_asynchronous_shaders) { -- cgit v1.2.3 From 3b339fbbf65a50ec2ec8baacd175ca7577c3b8bd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 23:33:26 -0400 Subject: glsl: Conditionally use fine/coarse derivatives based on device support --- .../backend/glsl/emit_context.cpp | 3 +++ .../backend/glsl/emit_glsl_warp.cpp | 28 ++++++++++++++++++---- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 4 files changed, 29 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index e18f8257e..0e8fe017d 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -395,6 +395,9 @@ void EmitContext::SetupExtensions() { if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } + if (info.uses_derivatives && profile.support_gl_derivative_control) { + header += "#extension GL_ARB_derivative_control : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 4d418cbbc..a982dd8a2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -180,18 +180,38 @@ void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, st } void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdxFine({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } } void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdyFine({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyFine({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } } void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); + ctx.AddF32("{}=dFdx({});", inst, op_a); + } } void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { - ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + if (ctx.profile.support_gl_derivative_control) { + ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); + } else { + LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); + ctx.AddF32("{}=dFdy({});", inst, op_a); + } } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6db794e91..e8cfc03af 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -89,6 +89,7 @@ struct Profile { bool support_gl_warp_intrinsics{}; bool support_gl_variable_aoffi{}; bool support_gl_sparse_textures{}; + bool support_gl_derivative_control{}; bool warp_size_potentially_larger_than_guest{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5ffe28d45..fedbce2f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_gl_warp_intrinsics = false, .support_gl_variable_aoffi = device.HasVariableAoffi(), .support_gl_sparse_textures = device.HasSparseTexture2(), + .support_gl_derivative_control = device.HasDerivativeControl(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), -- cgit v1.2.3 From d36f667bc0adaa9f50d53efb4c908aadc38921a6 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:23:57 -0400 Subject: glsl: Address rest of feedback --- .../backend/glsl/emit_context.cpp | 44 +++++++++++++++++----- src/shader_recompiler/backend/glsl/emit_context.h | 2 + .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 22 +++++++---- src/shader_recompiler/ir_opt/texture_pass.cpp | 11 +++++- src/shader_recompiler/profile.h | 2 + src/shader_recompiler/shader_info.h | 2 + src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++ .../renderer_opengl/gl_graphics_pipeline.cpp | 32 ++++++++-------- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 11 files changed, 86 insertions(+), 38 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0e8fe017d..d224c4d84 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) { } } +std::string_view ImageAccessQualifier(bool is_written, bool is_read) { + if (is_written && !is_read) { + return "writeonly "; + } + if (is_read && !is_written) { + return "readonly "; + } + return ""; +} + std::string_view GetTessMode(TessPrimitive primitive) { switch (primitive) { case TessPrimitive::Triangles: @@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, const RuntimeInfo& runtime_info_) : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { - header += "#pragma optionNV(fastmath off)\n"; + if (profile.need_fastmath_off) { + header += "#pragma optionNV(fastmath off)\n"; + } SetupExtensions(); stage = program.stage; switch (program.stage) { @@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } for (size_t index = 0; index < info.stores_generics.size(); ++index) { // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { + if (info.stores_generics[index] || StageInitializesVaryings()) { DefineGenericOutput(index, program.invocations); } } @@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } +bool EmitContext::StageInitializesVaryings() const noexcept { + switch (stage) { + case Stage::VertexA: + case Stage::VertexB: + case Stage::Geometry: + return true; + default: + return false; + } +} + void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; @@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_atomic_float : enable\n"; } if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { - header += "#extension NV_shader_atomic_fp16_vector : enable\n"; + header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; } if (info.uses_fp16) { if (profile.support_gl_nv_gpu_shader_5) { @@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() { if (info.stores_viewport_mask && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } - if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { + if (info.uses_typeless_image_reads) { header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; } if (info.uses_derivatives && profile.support_gl_derivative_control) { @@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); } - write_func += "}"; - write_func_64 += "}"; - write_func_128 += "}"; + write_func += '}'; + write_func_64 += '}'; + write_func_128 += '}'; load_func += "return 0u;}"; load_func_64 += "return uvec2(0);}"; load_func_128 += "return uvec4(0);}"; @@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) { for (const auto& desc : info.image_buffer_descriptors) { image_buffers.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; - header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, - format, bindings.image, array_decorator); + header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", + bindings.image, format, qualifier, bindings.image, array_decorator); bindings.image += desc.count; } images.reserve(info.image_descriptors.size()); @@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) { images.push_back({bindings.image, desc.count}); const auto format{ImageFormatString(desc.format)}; const auto image_type{ImageType(desc.type)}; - const auto qualifier{desc.is_written ? "" : "readonly "}; + const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, qualifier, image_type, bindings.image, array_decorator); diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8fa87c02c..4a50556e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,6 +136,8 @@ public: code += '\n'; } + [[nodiscard]] bool StageInitializesVaryings() const noexcept; + std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index edeecc26e..a241d18fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); break; case IR::Attribute::FogCoordinate: - ctx.Add("gl_FogFragCoord.x={};", value); + ctx.Add("gl_FogFragCoord={};", value); break; case IR::Attribute::ClipDistance0: case IR::Attribute::ClipDistance1: diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index cfef58d79..59ca52f07 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -10,6 +10,17 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void InitializeVaryings(EmitContext& ctx) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + // TODO: Properly resolve attribute issues + for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { + if (!ctx.info.stores_generics[index]) { + ctx.Add("out_attr{}=vec4(0,0,0,1);", index); + } + } +} +} // Anonymous namespace void EmitPhi(EmitContext& ctx, IR::Inst& phi) { const size_t num_args{phi.NumArgs()}; @@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { - ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { - ctx.Add("out_attr{}=vec4(0,0,0,1);", index); - } - } + if (ctx.StageInitializesVaryings()) { + InitializeVaryings(ctx); } } @@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); + InitializeVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e9098239d..737f186ab 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -312,11 +312,14 @@ public: } u32 Add(const ImageBufferDescriptor& desc) { - return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { + const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && desc.size_shift == existing.size_shift; - }); + })}; + image_buffer_descriptors[index].is_written |= desc.is_written; + image_buffer_descriptors[index].is_read |= desc.is_read; + return index; } u32 Add(const TextureDescriptor& desc) { @@ -339,6 +342,7 @@ public: desc.size_shift == existing.size_shift; })}; image_descriptors[index].is_written |= desc.is_written; + image_descriptors[index].is_read |= desc.is_read; return index; } @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { throw NotImplementedException("Unexpected separate sampler"); } const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; + const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; if (flags.type == TextureType::Buffer) { index = descriptors.Add(ImageBufferDescriptor{ .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { .type = flags.type, .format = flags.image_format, .is_written = is_written, + .is_read = is_read, .cbuf_index = cbuf.index, .cbuf_offset = cbuf.offset, .count = cbuf.count, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e8cfc03af..a3c412a0f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -97,6 +97,8 @@ struct Profile { /// Fragment outputs have to be declared even if they are not written to avoid undefined values. /// See Ori and the Blind Forest's main menu for reference. bool need_declared_frag_colors{}; + /// Prevents fast math optimizations that may cause inaccuracies + bool need_fastmath_off{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead bool has_broken_spirv_clamp{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 74d7a6a94..e9ebc16a4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vectorMarkShaderComplete(); + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); } u32 num_textures{}; u32 num_images{}; @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } is_built.store(true, std::memory_order_relaxed); }}; if (thread_worker) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, + .need_fastmath_off = device.NeedsFastmathOff(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, -- cgit v1.2.3 From 376aa94819b7da976adb120136d83980a757d044 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 01:49:19 -0300 Subject: shader: Rename maxwell/program.h to translate_program.h --- src/shader_recompiler/CMakeLists.txt | 4 +- src/shader_recompiler/frontend/maxwell/program.cpp | 203 --------------------- src/shader_recompiler/frontend/maxwell/program.h | 27 --- .../frontend/maxwell/translate_program.cpp | 203 +++++++++++++++++++++ .../frontend/maxwell/translate_program.h | 22 +++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 7 files changed, 229 insertions(+), 234 deletions(-) delete mode 100644 src/shader_recompiler/frontend/maxwell/program.cpp delete mode 100644 src/shader_recompiler/frontend/maxwell/program.h create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate_program.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 06ee50fff..f801a9f72 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -114,8 +114,6 @@ add_library(shader_recompiler STATIC frontend/maxwell/maxwell.inc frontend/maxwell/opcodes.cpp frontend/maxwell/opcodes.h - frontend/maxwell/program.cpp - frontend/maxwell/program.h frontend/maxwell/structured_control_flow.cpp frontend/maxwell/structured_control_flow.h frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp @@ -211,6 +209,8 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/warp_shuffle.cpp frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.h + frontend/maxwell/translate_program.cpp + frontend/maxwell/translate_program.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp deleted file mode 100644 index 8489f9a5f..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include -#include -#include -#include - -#include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/ir/basic_block.h" -#include "shader_recompiler/frontend/ir/post_order.h" -#include "shader_recompiler/frontend/maxwell/program.h" -#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" -#include "shader_recompiler/frontend/maxwell/translate/translate.h" -#include "shader_recompiler/ir_opt/passes.h" - -namespace Shader::Maxwell { -namespace { -IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { - auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { - return node.type == IR::AbstractSyntaxNode::Type::Block; - })}; - IR::BlockList blocks(std::ranges::distance(syntax_blocks)); - std::ranges::transform(syntax_blocks, blocks.begin(), - [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); - return blocks; -} - -void RemoveUnreachableBlocks(IR::Program& program) { - // Some blocks might be unreachable if a function call exists unconditionally - // If this happens the number of blocks and post order blocks will mismatch - if (program.blocks.size() == program.post_order_blocks.size()) { - return; - } - const auto begin{program.blocks.begin() + 1}; - const auto end{program.blocks.end()}; - const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; - program.blocks.erase(std::remove_if(begin, end, pred), end); -} - -void CollectInterpolationInfo(Environment& env, IR::Program& program) { - if (program.stage != Stage::Fragment) { - return; - } - const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { - std::optional imap; - for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { - if (value == PixelImap::Unused) { - continue; - } - if (imap && imap != value) { - throw NotImplementedException("Per component interpolation"); - } - imap = value; - } - if (!imap) { - continue; - } - program.info.input_generics[index].interpolation = [&] { - switch (*imap) { - case PixelImap::Unused: - case PixelImap::Perspective: - return Interpolation::Smooth; - case PixelImap::Constant: - return Interpolation::Flat; - case PixelImap::ScreenLinear: - return Interpolation::NoPerspective; - } - throw NotImplementedException("Unknown interpolation {}", *imap); - }(); - } -} - -void AddNVNStorageBuffers(IR::Program& program) { - if (!program.info.uses_global_memory) { - return; - } - const u32 driver_cbuf{0}; - const u32 descriptor_size{0x10}; - const u32 num_buffers{16}; - const u32 base{[&] { - switch (program.stage) { - case Stage::VertexA: - case Stage::VertexB: - return 0x110u; - case Stage::TessellationControl: - return 0x210u; - case Stage::TessellationEval: - return 0x310u; - case Stage::Geometry: - return 0x410u; - case Stage::Fragment: - return 0x510u; - case Stage::Compute: - return 0x310u; - } - throw InvalidArgument("Invalid stage {}", program.stage); - }()}; - auto& descs{program.info.storage_buffers_descriptors}; - for (u32 index = 0; index < num_buffers; ++index) { - if (!program.info.nvn_buffer_used[index]) { - continue; - } - const u32 offset{base + index * descriptor_size}; - const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; - if (it != descs.end()) { - it->is_written |= program.info.stores_global_memory; - continue; - } - descs.push_back({ - .cbuf_index = driver_cbuf, - .cbuf_offset = offset, - .count = 1, - .is_written = program.info.stores_global_memory, - }); - } -} -} // Anonymous namespace - -IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { - IR::Program program; - program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); - program.blocks = GenerateBlocks(program.syntax_list); - program.post_order_blocks = PostOrder(program.syntax_list.front()); - program.stage = env.ShaderStage(); - program.local_memory_size = env.LocalMemorySize(); - switch (program.stage) { - case Stage::TessellationControl: { - const ProgramHeader& sph{env.SPH()}; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Geometry: { - const ProgramHeader& sph{env.SPH()}; - program.output_topology = sph.common3.output_topology; - program.output_vertices = sph.common4.max_output_vertices; - program.invocations = sph.common2.threads_per_input_primitive; - break; - } - case Stage::Compute: - program.workgroup_size = env.WorkgroupSize(); - program.shared_memory_size = env.SharedMemorySize(); - break; - default: - break; - } - RemoveUnreachableBlocks(program); - - // Replace instructions before the SSA rewrite - Optimization::LowerFp16ToFp32(program); - - Optimization::SsaRewritePass(program); - - Optimization::GlobalMemoryToStorageBufferPass(program); - Optimization::TexturePass(env, program); - - Optimization::ConstantPropagationPass(program); - Optimization::DeadCodeEliminationPass(program); - Optimization::VerificationPass(program); - Optimization::CollectShaderInfoPass(env, program); - CollectInterpolationInfo(env, program); - AddNVNStorageBuffers(program); - return program; -} - -IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b) { - IR::Program result{}; - Optimization::VertexATransformPass(vertex_a); - Optimization::VertexBTransformPass(vertex_b); - for (const auto& term : vertex_a.syntax_list) { - if (term.type == IR::AbstractSyntaxNode::Type::Return) { - continue; - } - result.syntax_list.push_back(term); - } - for (const auto& term : vertex_b.syntax_list) { - result.syntax_list.push_back(term); - } - result.blocks = GenerateBlocks(result.syntax_list); - result.post_order_blocks = vertex_b.post_order_blocks; - for (const auto& block : vertex_a.post_order_blocks) { - result.post_order_blocks.push_back(block); - } - result.stage = Stage::VertexB; - result.info = vertex_a.info; - result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; - } - Optimization::JoinTextureInfo(result.info, vertex_b.info); - Optimization::JoinStorageInfo(result.info, vertex_b.info); - Optimization::DeadCodeEliminationPass(result); - Optimization::VerificationPass(result); - Optimization::CollectShaderInfoPass(env_vertex_b, result); - return result; -} - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/program.h b/src/shader_recompiler/frontend/maxwell/program.h deleted file mode 100644 index f7f5930e4..000000000 --- a/src/shader_recompiler/frontend/maxwell/program.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include - -#include "shader_recompiler/environment.h" -#include "shader_recompiler/frontend/ir/program.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/object_pool.h" - -namespace Shader::Maxwell { - -[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); - -[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, - Environment& env_vertex_b); - -} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..e52170e3e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -0,0 +1,203 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/post_order.h" +#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" +#include "shader_recompiler/frontend/maxwell/translate/translate.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/ir_opt/passes.h" + +namespace Shader::Maxwell { +namespace { +IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { + auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) { + return node.type == IR::AbstractSyntaxNode::Type::Block; + })}; + IR::BlockList blocks(std::ranges::distance(syntax_blocks)); + std::ranges::transform(syntax_blocks, blocks.begin(), + [](const IR::AbstractSyntaxNode& node) { return node.data.block; }); + return blocks; +} + +void RemoveUnreachableBlocks(IR::Program& program) { + // Some blocks might be unreachable if a function call exists unconditionally + // If this happens the number of blocks and post order blocks will mismatch + if (program.blocks.size() == program.post_order_blocks.size()) { + return; + } + const auto begin{program.blocks.begin() + 1}; + const auto end{program.blocks.end()}; + const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; + program.blocks.erase(std::remove_if(begin, end, pred), end); +} + +void CollectInterpolationInfo(Environment& env, IR::Program& program) { + if (program.stage != Stage::Fragment) { + return; + } + const ProgramHeader& sph{env.SPH()}; + for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + std::optional imap; + for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { + if (value == PixelImap::Unused) { + continue; + } + if (imap && imap != value) { + throw NotImplementedException("Per component interpolation"); + } + imap = value; + } + if (!imap) { + continue; + } + program.info.input_generics[index].interpolation = [&] { + switch (*imap) { + case PixelImap::Unused: + case PixelImap::Perspective: + return Interpolation::Smooth; + case PixelImap::Constant: + return Interpolation::Flat; + case PixelImap::ScreenLinear: + return Interpolation::NoPerspective; + } + throw NotImplementedException("Unknown interpolation {}", *imap); + }(); + } +} + +void AddNVNStorageBuffers(IR::Program& program) { + if (!program.info.uses_global_memory) { + return; + } + const u32 driver_cbuf{0}; + const u32 descriptor_size{0x10}; + const u32 num_buffers{16}; + const u32 base{[&] { + switch (program.stage) { + case Stage::VertexA: + case Stage::VertexB: + return 0x110u; + case Stage::TessellationControl: + return 0x210u; + case Stage::TessellationEval: + return 0x310u; + case Stage::Geometry: + return 0x410u; + case Stage::Fragment: + return 0x510u; + case Stage::Compute: + return 0x310u; + } + throw InvalidArgument("Invalid stage {}", program.stage); + }()}; + auto& descs{program.info.storage_buffers_descriptors}; + for (u32 index = 0; index < num_buffers; ++index) { + if (!program.info.nvn_buffer_used[index]) { + continue; + } + const u32 offset{base + index * descriptor_size}; + const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; + if (it != descs.end()) { + it->is_written |= program.info.stores_global_memory; + continue; + } + descs.push_back({ + .cbuf_index = driver_cbuf, + .cbuf_offset = offset, + .count = 1, + .is_written = program.info.stores_global_memory, + }); + } +} +} // Anonymous namespace + +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + Environment& env, Flow::CFG& cfg) { + IR::Program program; + program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); + program.blocks = GenerateBlocks(program.syntax_list); + program.post_order_blocks = PostOrder(program.syntax_list.front()); + program.stage = env.ShaderStage(); + program.local_memory_size = env.LocalMemorySize(); + switch (program.stage) { + case Stage::TessellationControl: { + const ProgramHeader& sph{env.SPH()}; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Geometry: { + const ProgramHeader& sph{env.SPH()}; + program.output_topology = sph.common3.output_topology; + program.output_vertices = sph.common4.max_output_vertices; + program.invocations = sph.common2.threads_per_input_primitive; + break; + } + case Stage::Compute: + program.workgroup_size = env.WorkgroupSize(); + program.shared_memory_size = env.SharedMemorySize(); + break; + default: + break; + } + RemoveUnreachableBlocks(program); + + // Replace instructions before the SSA rewrite + Optimization::LowerFp16ToFp32(program); + + Optimization::SsaRewritePass(program); + + Optimization::GlobalMemoryToStorageBufferPass(program); + Optimization::TexturePass(env, program); + + Optimization::ConstantPropagationPass(program); + Optimization::DeadCodeEliminationPass(program); + Optimization::VerificationPass(program); + Optimization::CollectShaderInfoPass(env, program); + CollectInterpolationInfo(env, program); + AddNVNStorageBuffers(program); + return program; +} + +IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b) { + IR::Program result{}; + Optimization::VertexATransformPass(vertex_a); + Optimization::VertexBTransformPass(vertex_b); + for (const auto& term : vertex_a.syntax_list) { + if (term.type == IR::AbstractSyntaxNode::Type::Return) { + continue; + } + result.syntax_list.push_back(term); + } + for (const auto& term : vertex_b.syntax_list) { + result.syntax_list.push_back(term); + } + result.blocks = GenerateBlocks(result.syntax_list); + result.post_order_blocks = vertex_b.post_order_blocks; + for (const auto& block : vertex_a.post_order_blocks) { + result.post_order_blocks.push_back(block); + } + result.stage = Stage::VertexB; + result.info = vertex_a.info; + result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); + for (size_t index = 0; index < 32; ++index) { + result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; + result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + } + Optimization::JoinTextureInfo(result.info, vertex_b.info); + Optimization::JoinStorageInfo(result.info, vertex_b.info); + Optimization::DeadCodeEliminationPass(result); + Optimization::VerificationPass(result); + Optimization::CollectShaderInfoPass(env_vertex_b, result); + return result; +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..1e5536443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h @@ -0,0 +1,22 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "shader_recompiler/environment.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/ir/program.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" + +namespace Shader::Maxwell { + +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, Environment& env, + Flow::CFG& cfg); + +[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, + Environment& env_vertex_b); + +} // namespace Shader::Maxwell diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 620666622..c05cd5d28 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,7 +22,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b17f34cdd..0b6fe8e2e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -20,7 +20,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/environment.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" -#include "shader_recompiler/frontend/maxwell/program.h" +#include "shader_recompiler/frontend/maxwell/translate_program.h" #include "shader_recompiler/program_header.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" -- cgit v1.2.3 From cbbca26d182991abf68d9b2e1b1e5935bf4eb476 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:03:08 -0300 Subject: shader: Add support for native 16-bit floats --- src/shader_recompiler/CMakeLists.txt | 1 + .../frontend/maxwell/translate_program.cpp | 8 +++++--- .../frontend/maxwell/translate_program.h | 3 ++- src/shader_recompiler/host_translate_info.h | 18 ++++++++++++++++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 ++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 3 +++ src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 ++++++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +++ src/video_core/vulkan_common/vulkan_device.cpp | 4 ++-- 9 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 src/shader_recompiler/host_translate_info.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f801a9f72..164e94071 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/translate.h frontend/maxwell/translate_program.cpp frontend/maxwell/translate_program.h + host_translate_info.h ir_opt/collect_shader_info_pass.cpp ir_opt/constant_propagation_pass.cpp ir_opt/dead_code_elimination_pass.cpp diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" #include "shader_recompiler/frontend/maxwell/translate/translate.h" #include "shader_recompiler/frontend/maxwell/translate_program.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/ir_opt/passes.h" namespace Shader::Maxwell { @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { } // Anonymous namespace IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, - Environment& env, Flow::CFG& cfg) { + Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { IR::Program program; program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); program.blocks = GenerateBlocks(program.syntax_list); @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, Environment& env, - Flow::CFG& cfg); + Flow::CFG& cfg, const HostTranslateInfo& host_info); [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, Environment& env_vertex_b); diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h @@ -0,0 +1,18 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Shader { + +// Try to keep entries here to a minimum +// They can accidentally change the cached information in a shader + +/// Misc information about the host +struct HostTranslateInfo { + bool support_float16{}; ///< True when the device supports 16-bit floats + bool support_int64{}; ///< True when the device supports 64-bit integers +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d28..b459397f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + }, + host_info{ + .support_float16 = false, + .support_int64 = true, } { if (use_asynchronous_shaders) { workers = CreateWorkers(); @@ -373,15 +377,15 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); for (const auto& desc : programs[index].info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; for (const auto& desc : program_vb.info.storage_buffers_descriptors) { total_storage_buffers += desc.count; } @@ -449,7 +453,7 @@ std::unique_ptr ShaderCache::CreateComputePipeline( LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; u32 num_storage_buffers{}; for (const auto& desc : program.info.storage_buffers_descriptors) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d90..6952a1f2c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" @@ -82,6 +83,8 @@ private: std::unordered_map> compute_cache; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path shader_cache_filename; std::unique_ptr workers; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_signed_operations = false, .ignore_nan_fp_comparisons = false, }; + host_info = Shader::HostTranslateInfo{ + .support_float16 = device.IsFloat16Supported(), + .support_int64 = true, + }; } PipelineCache::~PipelineCache() = default; @@ -484,11 +488,11 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); if (!uses_vertex_a || index != 1) { // Normal path - programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); } else { // VertexB path when VertexA is present. - Shader::IR::Program& program_va{programs[0]}; - Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto& program_va{programs[0]}; + auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); } } @@ -575,7 +579,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; - Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; const std::vector code{EmitSPIRV(profile, program)}; device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -19,6 +19,7 @@ #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" #include "video_core/engines/maxwell_3d.h" @@ -157,6 +158,8 @@ private: ShaderPools main_pools; Shader::Profile profile; + Shader::HostTranslateInfo host_info; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf8..0d8c6cd08 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. - // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); - // is_float16_supported = false; + LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); + is_float16_supported = false; } graphics_queue = logical.GetQueue(graphics_family); -- cgit v1.2.3 From 0ffea97e2ea2c8f58928e13dc2488d620ea98ea8 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 03:22:56 -0300 Subject: shader: Split profile and runtime info headers --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 1 + src/shader_recompiler/backend/glasm/emit_glasm.cpp | 1 + src/shader_recompiler/backend/glasm/emit_glasm.h | 1 + .../backend/glasm/emit_glasm_memory.cpp | 2 +- .../backend/glsl/emit_context.cpp | 1 + src/shader_recompiler/backend/glsl/emit_glsl.h | 1 + .../backend/glsl/emit_glsl_instructions.h | 3 - src/shader_recompiler/backend/spirv/emit_context.h | 1 + src/shader_recompiler/profile.h | 72 ------------------- src/shader_recompiler/runtime_info.h | 83 ++++++++++++++++++++++ src/video_core/renderer_opengl/gl_shader_cache.h | 1 + src/video_core/transform_feedback.h | 2 +- 13 files changed, 93 insertions(+), 77 deletions(-) create mode 100644 src/shader_recompiler/runtime_info.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 164e94071..f6719ad9d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -226,6 +226,7 @@ add_library(shader_recompiler STATIC object_pool.h profile.h program_header.h + runtime_info.h shader_info.h ) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 08918a5c2..21e14867c 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 832b4fd40..66e4aea04 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -13,6 +13,7 @@ #include "shader_recompiler/frontend/ir/ir_emitter.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3df32a4a6..bcb55f062 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index cafb5c92a..af9fac7c1 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -8,7 +8,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLASM { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 54aa88b63..93057ebb9 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h index fe221fa7c..20e5719e6 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h @@ -9,6 +9,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 9e812dabb..df28036e4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -18,9 +18,6 @@ class Value; namespace Shader::Backend::GLSL { class EmitContext; -inline void EmitSetLoopSafetyVariable(EmitContext&) {} -inline void EmitGetLoopSafetyVariable(EmitContext&) {} - #define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) // Microinstruction emitters diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 961c9180c..527685fb8 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -12,6 +12,7 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/frontend/ir/program.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/shader_info.h" namespace Shader::Backend::SPIRV { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a3c412a0f..d46be1638 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -4,59 +4,10 @@ #pragma once -#include -#include -#include - #include "common/common_types.h" namespace Shader { -enum class AttributeType : u8 { - Float, - SignedInt, - UnsignedInt, - Disabled, -}; - -enum class InputTopology { - Points, - Lines, - LinesAdjacency, - Triangles, - TrianglesAdjacency, -}; - -enum class CompareFunction { - Never, - Less, - Equal, - LessThanEqual, - Greater, - NotEqual, - GreaterThanEqual, - Always, -}; - -enum class TessPrimitive { - Isolines, - Triangles, - Quads, -}; - -enum class TessSpacing { - Equal, - FractionalOdd, - FractionalEven, -}; - -struct TransformFeedbackVarying { - u32 buffer{}; - u32 stride{}; - u32 offset{}; - u32 components{}; -}; - struct Profile { u32 supported_spirv{0x00010000}; @@ -114,27 +65,4 @@ struct Profile { bool ignore_nan_fp_comparisons{}; }; -struct RuntimeInfo { - std::array generic_input_types{}; - bool convert_depth_mode{}; - bool force_early_z{}; - - TessPrimitive tess_primitive{}; - TessSpacing tess_spacing{}; - bool tess_clockwise{}; - - InputTopology input_topology{}; - - std::optional fixed_state_point_size; - std::optional alpha_test_func; - float alpha_test_reference{}; - - // Static y negate value - bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM - bool glasm_use_storage_buffers{}; - - std::vector xfb_varyings; -}; - } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 000000000..d4b047b4d --- /dev/null +++ b/src/shader_recompiler/runtime_info.h @@ -0,0 +1,83 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" + +namespace Shader { + +enum class AttributeType : u8 { + Float, + SignedInt, + UnsignedInt, + Disabled, +}; + +enum class InputTopology { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency, +}; + +enum class CompareFunction { + Never, + Less, + Equal, + LessThanEqual, + Greater, + NotEqual, + GreaterThanEqual, + Always, +}; + +enum class TessPrimitive { + Isolines, + Triangles, + Quads, +}; + +enum class TessSpacing { + Equal, + FractionalOdd, + FractionalEven, +}; + +struct TransformFeedbackVarying { + u32 buffer{}; + u32 stride{}; + u32 offset{}; + u32 components{}; +}; + +struct RuntimeInfo { + std::array generic_input_types{}; + bool convert_depth_mode{}; + bool force_early_z{}; + + TessPrimitive tess_primitive{}; + TessSpacing tess_spacing{}; + bool tess_clockwise{}; + + InputTopology input_topology{}; + + std::optional fixed_state_point_size; + std::optional alpha_test_func; + float alpha_test_reference{}; + + // Static y negate value + bool y_negate{}; + // Use storage buffers instead of global pointers on GLASM + bool glasm_use_storage_buffers{}; + + std::vector xfb_varyings; +}; + +} // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6952a1f2c..ff5707119 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -16,6 +16,7 @@ #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" +#include "shader_recompiler/profile.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index 6832c6db1..8f6946d65 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h @@ -8,7 +8,7 @@ #include #include "common/common_types.h" -#include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/engines/maxwell_3d.h" namespace VideoCommon { -- cgit v1.2.3 From 374eeda1a35f6a1dc81cf22122c701be68e89c0f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 16 Jun 2021 04:59:30 -0300 Subject: shader: Properly manage attributes not written from previous stages --- .../backend/glsl/emit_context.cpp | 26 +++++++--------------- src/shader_recompiler/backend/glsl/emit_context.h | 2 -- .../backend/glsl/emit_glsl_context_get_set.cpp | 5 +++++ .../backend/glsl/emit_glsl_special.cpp | 18 +++++++-------- .../backend/spirv/emit_context.cpp | 3 +++ .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- .../frontend/maxwell/translate_program.cpp | 4 +++- .../ir_opt/collect_shader_info_pass.cpp | 6 +++-- src/shader_recompiler/runtime_info.h | 8 +++++-- src/shader_recompiler/shader_info.h | 2 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 11 ++++++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +++++++++---- 12 files changed, 62 insertions(+), 41 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index bd40356a1..14c009535 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -327,11 +327,12 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile for (size_t index = 0; index < info.input_generics.size(); ++index) { const auto& generic{info.input_generics[index]}; - if (generic.used) { - header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, - InputArrayDecorator(stage)); + if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + continue; } + header += + fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,10 +350,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile } } for (size_t index = 0; index < info.stores_generics.size(); ++index) { - // TODO: Properly resolve attribute issues - if (info.stores_generics[index] || StageInitializesVaryings()) { - DefineGenericOutput(index, program.invocations); + if (!info.stores_generics[index]) { + continue; } + DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -362,17 +363,6 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile DefineConstants(); } -bool EmitContext::StageInitializesVaryings() const noexcept { - switch (stage) { - case Stage::VertexA: - case Stage::VertexB: - case Stage::Geometry: - return true; - default: - return false; - } -} - void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 4a50556e1..8fa87c02c 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -136,8 +136,6 @@ public: code += '\n'; } - [[nodiscard]] bool StageInitializesVaryings() const noexcept; - std::string header; std::string code; VarAlloc var_alloc; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index a241d18fe..663ff3753 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/profile.h" +#include "shader_recompiler/runtime_info.h" namespace Shader::Backend::GLSL { namespace { @@ -179,6 +180,10 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; + if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + ctx.AddF32("{}=0.f;", inst, attr); + return; + } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index f8e8aaa67..1a2d3dcea 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -12,11 +12,12 @@ namespace Shader::Backend::GLSL { namespace { -void InitializeVaryings(EmitContext& ctx) { - ctx.Add("gl_Position=vec4(0,0,0,1);"); - // TODO: Properly resolve attribute issues - for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { - if (!ctx.info.stores_generics[index]) { +void InitializeOutputVaryings(EmitContext& ctx) { + if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { + ctx.Add("gl_Position=vec4(0,0,0,1);"); + } + for (size_t index = 0; index < 16; ++index) { + if (ctx.info.stores_generics[index]) { ctx.Add("out_attr{}=vec4(0,0,0,1);", index); } } @@ -56,9 +57,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& } void EmitPrologue(EmitContext& ctx) { - if (ctx.StageInitializesVaryings()) { - InitializeVaryings(ctx); - } + InitializeOutputVaryings(ctx); + if (ctx.stage == Stage::Fragment && ctx.profile.need_declared_frag_colors) { for (size_t index = 0; index < ctx.info.stores_frag_color.size(); ++index) { if (ctx.info.stores_frag_color[index]) { @@ -73,7 +73,7 @@ void EmitEpilogue(EmitContext&) {} void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); - InitializeVaryings(ctx); + InitializeOutputVaryings(ctx); } void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 007b79650..612d087ad 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -1209,6 +1209,9 @@ void EmitContext::DefineInputs(const Info& info) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } for (size_t index = 0; index < info.input_generics.size(); ++index) { + if (!runtime_info.previous_stage_stores_generic[index]) { + continue; + } const InputVarying generic{info.input_generics[index]}; if (!generic.used) { continue; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 42fff74e3..4ac1fbae5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -286,7 +286,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type) { + if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 5250509c1..ed8729fca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -192,7 +192,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); for (size_t index = 0; index < 32; ++index) { result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - result.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; + if (vertex_b.info.stores_generics[index]) { + result.info.stores_generics[index] = true; + } } Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 47933df97..bab32b58b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -79,7 +79,7 @@ void GetAttribute(Info& info, IR::Attribute attr) { void SetAttribute(Info& info, IR::Attribute attr) { if (IR::IsGeneric(attr)) { - info.stores_generics.at(IR::GenericAttributeIndex(attr)) = true; + info.stores_generics[IR::GenericAttributeIndex(attr)] = true; return; } if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { @@ -956,7 +956,9 @@ void GatherInfoFromHeader(Environment& env, Info& info) { } if (info.stores_indexed_attributes) { for (size_t i = 0; i < info.stores_generics.size(); i++) { - info.stores_generics[i] |= header.vtg.IsOutputGenericVectorActive(i); + if (header.vtg.IsOutputGenericVectorActive(i)) { + info.stores_generics[i] = true; + } } info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; info.stores_position |= header.vtg.omap_systemb.position != 0; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index d4b047b4d..63fe2afaf 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -59,6 +60,8 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; + std::bitset<32> previous_stage_stores_generic{}; + bool convert_depth_mode{}; bool force_early_z{}; @@ -72,11 +75,12 @@ struct RuntimeInfo { std::optional alpha_test_func; float alpha_test_reference{}; - // Static y negate value + /// Static Y negate value bool y_negate{}; - // Use storage buffers instead of global pointers on GLASM + /// Use storage buffers instead of global pointers on GLASM bool glasm_use_storage_buffers{}; + /// Transform feedback state for each varying std::vector xfb_varyings; }; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index e9ebc16a4..a20e15d2e 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -140,7 +140,7 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::array stores_generics{}; + std::bitset<32> stores_generics{}; bool stores_layer{}; bool stores_viewport_index{}; bool stores_point_size{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b459397f5..b8b24dd3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -58,8 +58,15 @@ auto MakeSpan(Container& container) { Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, const Shader::IR::Program& program, + const Shader::IR::Program* previous_program, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } switch (program.stage) { case Shader::Stage::VertexB: case Shader::Stage::Geometry: @@ -400,6 +407,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; Shader::Backend::Bindings binding; + Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) { @@ -413,12 +421,13 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( infos[stage_index] = &program.info; const auto runtime_info{ - MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; + MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } + previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 72e6f4207..dc028306a 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -90,7 +90,7 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso return {}; } -static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { +Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { if (attr.enabled == 0) { return Shader::AttributeType::Disabled; } @@ -124,9 +124,15 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; - + if (previous_program) { + info.previous_stage_stores_generic = previous_program->info.stores_generics; + } else { + // Mark all stores as available + info.previous_stage_stores_generic.flip(); + } const Shader::Stage stage{program.stage}; const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; @@ -499,6 +505,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::array infos{}; std::array modules; + const Shader::IR::Program* previous_stage{}; Shader::Backend::Bindings binding; for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -511,7 +518,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); @@ -519,6 +526,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } + previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; -- cgit v1.2.3 From fcff19e0fa3d21130bc7b6cd50a10db102b5d4d7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 17 Jun 2021 23:12:41 -0400 Subject: shaders: Allow shader notify when async shaders is disabled --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 8 ++++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 12 +++++------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b8b24dd3d..8aaadccc4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -430,10 +430,10 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique( - device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, + maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, infos, + key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dc028306a..e83628c13 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -529,11 +529,10 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( previous_stage = &program; } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; - return std::make_unique(maxwell3d, gpu_memory, scheduler, buffer_cache, - texture_cache, notify, device, descriptor_pool, - update_descriptor_queue, thread_worker, - render_pass_cache, key, std::move(modules), infos); + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, + descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, + std::move(modules), infos); } catch (const Shader::Exception& exception) { LOG_ERROR(Render_Vulkan, "{}", exception.what()); @@ -596,9 +595,8 @@ std::unique_ptr PipelineCache::CreateComputePipeline( spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; - VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique(device, descriptor_pool, update_descriptor_queue, - thread_worker, notify, program.info, + thread_worker, &shader_notify, program.info, std::move(spv_module)); } catch (const Shader::Exception& exception) { -- cgit v1.2.3 From 218dedca1f8572bc0e43f8e7ea577f4ece28c4c2 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:00:38 -0300 Subject: gl_graphics_pipeline: Port optimizations from Vulkan pipelines --- .../renderer_opengl/gl_graphics_pipeline.cpp | 180 ++++++++++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 18 ++- 2 files changed, 141 insertions(+), 57 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 8d11fbc55..6b62fa1da 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,6 +15,12 @@ #include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" +#if defined(_MSC_VER) && defined(NDEBUG) +#define LAMBDA_FORCEINLINE [[msvc::forceinline]] +#else +#define LAMBDA_FORCEINLINE +#endif + namespace OpenGL { namespace { using Shader::ImageBufferDescriptor; @@ -98,13 +104,76 @@ std::pair TransformFeedbackEnum(u8 location) { return {GL_POSITION, 0}; } -struct Spec { +template +bool Passes(const std::array& stage_infos, u32 enabled_mask) { + for (size_t stage = 0; stage < stage_infos.size(); ++stage) { + if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { + return false; + } + const auto& info{stage_infos[stage]}; + if constexpr (!Spec::has_storage_buffers) { + if (!info.storage_buffers_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_texture_buffers) { + if (!info.texture_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_image_buffers) { + if (!info.image_buffer_descriptors.empty()) { + return false; + } + } + if constexpr (!Spec::has_images) { + if (!info.image_descriptors.empty()) { + return false; + } + } + } + return true; +} + +using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); + +template +ConfigureFuncPtr FindSpec(const std::array& stage_infos, u32 enabled_mask) { + if constexpr (sizeof...(Specs) > 0) { + if (!Passes(stage_infos, enabled_mask)) { + return FindSpec(stage_infos, enabled_mask); + } + } + return GraphicsPipeline::MakeConfigureSpecFunc(); +} + +struct SimpleVertexFragmentSpec { + static constexpr std::array enabled_stages{true, false, false, false, true}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct SimpleVertexSpec { + static constexpr std::array enabled_stages{true, false, false, false, false}; + static constexpr bool has_storage_buffers = false; + static constexpr bool has_texture_buffers = false; + static constexpr bool has_image_buffers = false; + static constexpr bool has_images = false; +}; + +struct DefaultSpec { static constexpr std::array enabled_stages{true, true, true, true, true}; static constexpr bool has_storage_buffers = true; static constexpr bool has_texture_buffers = true; static constexpr bool has_image_buffers = true; static constexpr bool has_images = true; }; + +ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 enabled_mask) { + return FindSpec(infos, enabled_mask); +} } // Anonymous namespace size_t GraphicsPipelineKey::Hash() const noexcept { @@ -129,8 +198,52 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::ranges::transform(infos, stage_infos.begin(), - [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + auto& info{stage_infos[stage]}; + if (infos[stage]) { + info = *infos[stage]; + enabled_stages_mask |= 1u << stage; + } + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); @@ -142,7 +255,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } if (device.UseAssemblyShaders()) { assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } else { AttachShader(Stage(stage), program.handle, code); } @@ -150,49 +262,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c if (!device.UseAssemblyShaders()) { LinkProgram(program.handle); } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += - AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += - AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); - } if (shader_notify) { shader_notify->MarkShaderComplete(); } @@ -205,7 +274,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } } -void GraphicsPipeline::Configure(bool is_indexed) { +template +void GraphicsPipeline::ConfigureImpl(bool is_indexed) { std::array image_view_ids; std::array image_view_indices; std::array samplers; @@ -221,7 +291,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - const auto config_stage{[&](size_t stage) { + const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -311,7 +381,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); ImageId* texture_buffer_index{image_view_ids.data()}; - const auto bind_stage_info{[&](size_t stage) { + const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { size_t index{}; const auto add_buffer{[&](const auto& desc) { constexpr bool is_image = std::is_same_v; @@ -430,6 +500,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { } } +void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { + glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, + xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); +} + void GraphicsPipeline::GenerateTransformFeedbackState( const VideoCommon::TransformFeedbackState& xfb_state) { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal @@ -475,9 +550,4 @@ void GraphicsPipeline::GenerateTransformFeedbackState( num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } -void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, - xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); -} - } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 58deafd3c..a3546daa8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -75,7 +75,9 @@ public: const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); - void Configure(bool is_indexed); + void Configure(bool is_indexed) { + configure_func(this, is_indexed); + } void ConfigureTransformFeedback() const { if (num_xfb_attribs != 0) { @@ -91,11 +93,21 @@ public: return is_built.load(std::memory_order::relaxed); } + template + static auto MakeConfigureSpecFunc() { + return [](GraphicsPipeline* pipeline, bool is_indexed) { + pipeline->ConfigureImpl(is_indexed); + }; + } + private: - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + template + void ConfigureImpl(bool is_indexed); void ConfigureTransformFeedbackImpl() const; + void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -103,6 +115,8 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; + void (*configure_func)(GraphicsPipeline*, bool){}; + OGLProgram program; std::array assembly_programs; u32 enabled_stages_mask{}; -- cgit v1.2.3 From f5db8c74405c93b52efbdef318790bd9ec4661c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:23:50 -0300 Subject: gl_shader_cache: Check previous pipeline before checking hash map Port optimization from Vulkan. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 33 ++++++++++------------ .../renderer_opengl/gl_graphics_pipeline.h | 9 ++++-- src/video_core/renderer_opengl/gl_shader_cache.cpp | 20 +++++++++---- src/video_core/renderer_opengl/gl_shader_cache.h | 5 +++- src/video_core/renderer_vulkan/vk_pipeline_cache.h | 3 +- 5 files changed, 41 insertions(+), 29 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 6b62fa1da..92974ba08 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -184,17 +184,15 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, - BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, - Tegra::Engines::Maxwell3D& maxwell3d_, - ProgramManager& program_manager_, StateTracker& state_tracker_, - ShaderWorker* thread_worker, - VideoCore::ShaderNotify* shader_notify, - std::array sources, - const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state) - : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { +GraphicsPipeline::GraphicsPipeline( + const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, std::array sources, + const std::array& infos, const GraphicsPipelineKey& key_) + : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, + state_tracker{state_tracker_}, key{key_} { if (shader_notify) { shader_notify->MarkShaderBuilding(); } @@ -241,10 +239,10 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + if (assembly_shaders && key.xfb_enabled) { + GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { if (!device.UseAssemblyShaders()) { program.handle = glCreateProgram(); } @@ -505,15 +503,14 @@ void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); } -void GraphicsPipeline::GenerateTransformFeedbackState( - const VideoCommon::TransformFeedbackState& xfb_state) { +void GraphicsPipeline::GenerateTransformFeedbackState() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. GLint* cursor{xfb_attribs.data()}; GLint* current_stream{xfb_streams.data()}; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { - const auto& layout = xfb_state.layouts[feedback]; + const auto& layout = key.xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; @@ -528,7 +525,7 @@ void GraphicsPipeline::GenerateTransformFeedbackState( } ++current_stream; - const auto& locations = xfb_state.varyings[feedback]; + const auto& locations = key.xfb_state.varyings[feedback]; std::optional current_index; for (u32 offset = 0; offset < layout.varying_count; ++offset) { const u8 location = locations[offset]; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a3546daa8..a033d4a95 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -73,7 +73,7 @@ public: ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, const std::array& infos, - const VideoCommon::TransformFeedbackState* xfb_state); + const GraphicsPipelineKey& key_); void Configure(bool is_indexed) { configure_func(this, is_indexed); @@ -85,6 +85,10 @@ public: } } + [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept { + return key; + } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { return writes_global_memory; } @@ -106,7 +110,7 @@ private: void ConfigureTransformFeedbackImpl() const; - void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); + void GenerateTransformFeedbackState(); TextureCache& texture_cache; BufferCache& buffer_cache; @@ -114,6 +118,7 @@ private: Tegra::Engines::Maxwell3D& maxwell3d; ProgramManager& program_manager; StateTracker& state_tracker; + const GraphicsPipelineKey key; void (*configure_func)(GraphicsPipeline*, bool){}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8aaadccc4..c36b0d8cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -298,6 +298,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (!RefreshStages(graphics_key.unique_hashes)) { + current_pipeline = nullptr; return nullptr; } const auto& regs{maxwell3d.regs}; @@ -313,15 +314,23 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (graphics_key.xfb_enabled) { SetXfbState(graphics_key.xfb_state, regs); } + if (current_pipeline && graphics_key == current_pipeline->Key()) { + return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + } + return CurrentGraphicsPipelineSlowPath(); +} + +GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; - auto& program{pair->second}; + auto& pipeline{pair->second}; if (is_new) { - program = CreateGraphicsPipeline(); + pipeline = CreateGraphicsPipeline(); } - if (!program || !program->IsBuilt()) { + current_pipeline = pipeline.get(); + if (!pipeline || !pipeline->IsBuilt()) { return nullptr; } - return program.get(); + return pipeline.get(); } ComputePipeline* ShaderCache::CurrentComputePipeline() { @@ -432,8 +441,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, - key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, &shader_notify, sources, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ff5707119..16873fcec 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -53,6 +53,8 @@ public: [[nodiscard]] ComputePipeline* CurrentComputePipeline(); private: + GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( @@ -75,9 +77,10 @@ private: ProgramManager& program_manager; StateTracker& state_tracker; VideoCore::ShaderNotify& shader_notify; + const bool use_asynchronous_shaders; GraphicsPipelineKey graphics_key{}; - const bool use_asynchronous_shaders; + GraphicsPipeline* current_pipeline{}; ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 42da2960b..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -146,12 +146,11 @@ private: BufferCache& buffer_cache; TextureCache& texture_cache; VideoCore::ShaderNotify& shader_notify; + bool use_asynchronous_shaders{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - bool use_asynchronous_shaders{}; - std::unordered_map> compute_cache; std::unordered_map> graphics_cache; -- cgit v1.2.3 From 9bd05313849f76fc64406d5ebf3aadf39fa3bfde Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 02:35:30 -0300 Subject: gl_graphics_pipeline: Inline hash and operator== key functions --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 10 ---------- src/video_core/renderer_opengl/gl_graphics_pipeline.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 92974ba08..ad61a17a5 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,9 +3,7 @@ // Refer to the license.txt file included. #include -#include -#include "common/cityhash.h" #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -176,14 +174,6 @@ ConfigureFuncPtr ConfigureFunc(const std::array& infos, u32 ena } } // Anonymous namespace -size_t GraphicsPipelineKey::Hash() const noexcept { - return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); -} - -bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexcept { - return std::memcmp(this, &rhs, Size()) == 0; -} - GraphicsPipeline::GraphicsPipeline( const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index a033d4a95..f82d712f8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -5,10 +5,12 @@ #pragma once #include +#include #include #include #include "common/bit_field.h" +#include "common/cityhash.h" #include "common/common_types.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" @@ -44,9 +46,13 @@ struct GraphicsPipelineKey { std::array padding; VideoCommon::TransformFeedbackState xfb_state; - size_t Hash() const noexcept; + size_t Hash() const noexcept { + return static_cast(Common::CityHash64(reinterpret_cast(this), Size())); + } - bool operator==(const GraphicsPipelineKey&) const noexcept; + bool operator==(const GraphicsPipelineKey& rhs) const noexcept { + return std::memcmp(this, &rhs, Size()) == 0; + } bool operator!=(const GraphicsPipelineKey& rhs) const noexcept { return !operator==(rhs); -- cgit v1.2.3 From 3877918e9657bcde160080aecc1821cf8cb50ea4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 20 Jun 2021 17:09:50 -0300 Subject: gl_graphics_pipeline: Fix assembly shaders check for transform feedbacks --- src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index ad61a17a5..a93b03cf7 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -229,7 +229,7 @@ GraphicsPipeline::GraphicsPipeline( writes_global_memory &= !use_storage_buffers; configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); - if (assembly_shaders && key.xfb_enabled) { + if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { -- cgit v1.2.3 From fb166b5ff4b42279b2c63c69f5b5a35feafa259e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 23 Jun 2021 01:39:21 -0300 Subject: shader: Emulate 64-bit integers when not supported Useful for mobile and Intel Xe devices. --- src/shader_recompiler/frontend/maxwell/translate_program.cpp | 3 +++ src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/vulkan_common/vulkan_device.h | 5 +++++ 6 files changed, 16 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e728b43cc..c084f3400 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -154,6 +154,9 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool Date: Wed, 23 Jun 2021 02:41:00 -0300 Subject: shader: Unify shader stage types --- src/shader_recompiler/stage.h | 11 +++++++++-- src/video_core/engines/kepler_compute.cpp | 1 - src/video_core/engines/maxwell_3d.cpp | 1 - src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 21 --------------------- src/video_core/renderer_opengl/gl_device.cpp | 18 ++++++++++-------- src/video_core/renderer_opengl/gl_device.h | 11 ++++++----- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 -- src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 - src/video_core/renderer_opengl/gl_shader_cache.h | 1 - src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 15 ++++++++------- src/video_core/renderer_vulkan/maxwell_to_vk.h | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 -- src/video_core/shader_environment.cpp | 2 +- 15 files changed, 37 insertions(+), 55 deletions(-) delete mode 100644 src/video_core/engines/shader_type.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h index 7d4f2c0bb..5c1c8d8fc 100644 --- a/src/shader_recompiler/stage.h +++ b/src/shader_recompiler/stage.h @@ -9,13 +9,20 @@ namespace Shader { enum class Stage : u32 { - Compute, - VertexA, VertexB, TessellationControl, TessellationEval, Geometry, Fragment, + + Compute, + + VertexA, }; +constexpr u32 MaxStageTypes = 6; + +[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { + return static_cast(static_cast(Stage::VertexB) + index); +} } // namespace Shader diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index cae93c470..492b4c5a3 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 103a51fd0..b18b8a02a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -8,7 +8,6 @@ #include "core/core.h" #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d5790f6..fc2c36c6b 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -20,7 +20,6 @@ #include "video_core/engines/const_buffer_info.h" #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" -#include "video_core/engines/shader_type.h" #include "video_core/gpu.h" #include "video_core/macro/macro.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h deleted file mode 100644 index 49ce5cde5..000000000 --- a/src/video_core/engines/shader_type.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" - -namespace Tegra::Engines { - -enum class ShaderType : u32 { - Vertex = 0, - TesselationControl = 1, - TesselationEval = 2, - Geometry = 3, - Fragment = 4, - Compute = 5, -}; -static constexpr std::size_t MaxShaderTypes = 6; - -} // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b1b5ba1ab..27be347e6 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,7 @@ #include "common/logging/log.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "shader_recompiler/stage.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -59,16 +60,18 @@ bool HasExtension(std::span extensions, std::string_view return std::ranges::find(extensions, extension) != extensions.end(); } -std::array BuildMaxUniformBuffers() noexcept { - std::array max; - std::ranges::transform(LIMIT_UBOS, max.begin(), - [](GLenum pname) { return GetInteger(pname); }); +std::array BuildMaxUniformBuffers() noexcept { + std::array max; + std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger); return max; } bool IsASTCSupported() { - static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; - static constexpr std::array formats = { + static constexpr std::array targets{ + GL_TEXTURE_2D, + GL_TEXTURE_2D_ARRAY, + }; + static constexpr std::array formats{ GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, @@ -84,11 +87,10 @@ bool IsASTCSupported() { GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, }; - static constexpr std::array required_support = { + static constexpr std::array required_support{ GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, }; - for (const GLenum target : targets) { for (const GLenum format : formats) { for (const GLenum support : required_support) { diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0bd277d38..ad7b01b06 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,7 +6,7 @@ #include #include "common/common_types.h" -#include "video_core/engines/shader_type.h" +#include "shader_recompiler/stage.h" namespace OpenGL { @@ -16,8 +16,8 @@ public: [[nodiscard]] std::string GetVendorName() const; - u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { - return max_uniform_buffers[static_cast(shader_type)]; + u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept { + return max_uniform_buffers[static_cast(stage)]; } size_t GetUniformBufferAlignment() const { @@ -148,8 +148,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); - std::string vendor_name; - std::array max_uniform_buffers{}; + std::array max_uniform_buffers{}; size_t uniform_buffer_alignment{}; size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; @@ -181,6 +180,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + + std::string vendor_name; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e3d336f86..0f0d780b5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -23,7 +23,6 @@ #include "core/memory.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_query_cache.h" @@ -40,7 +39,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using GLvec4 = std::array; -using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f2f18b18a..5af9b7745 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -26,7 +26,6 @@ #include "shader_recompiler/profile.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 16873fcec..9d5306293 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -17,7 +17,6 @@ #include "shader_recompiler/host_translate_info.h" #include "shader_recompiler/object_pool.h" #include "shader_recompiler/profile.h" -#include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_context.h" diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 8f0b0b8ec..8f9b9a11a 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; } -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { +VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { switch (stage) { - case Tegra::Engines::ShaderType::Vertex: + case Shader::Stage::VertexA: + case Shader::Stage::VertexB: return VK_SHADER_STAGE_VERTEX_BIT; - case Tegra::Engines::ShaderType::TesselationControl: + case Shader::Stage::TessellationControl: return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - case Tegra::Engines::ShaderType::TesselationEval: + case Shader::Stage::TessellationEval: return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; - case Tegra::Engines::ShaderType::Geometry: + case Shader::Stage::Geometry: return VK_SHADER_STAGE_GEOMETRY_BIT; - case Tegra::Engines::ShaderType::Fragment: + case Shader::Stage::Fragment: return VK_SHADER_STAGE_FRAGMENT_BIT; - case Tegra::Engines::ShaderType::Compute: + case Shader::Stage::Compute: return VK_SHADER_STAGE_COMPUTE_BIT; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 50a599c11..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "shader_recompiler/stage.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/surface.h" #include "video_core/textures/texture.h" @@ -45,7 +46,7 @@ struct FormatInfo { [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, PixelFormat pixel_format); -VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); +VkShaderStageFlagBits ShaderStage(Shader::Stage stage); VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2b59a9d88..9eb353a88 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -737,7 +737,7 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .stage = MaxwellToVK::ShaderStage(static_cast(stage)), + .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), .module = *spv_modules[stage], .pName = "main", .pSpecializationInfo = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c57e16c50..f04c3394c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -58,8 +58,6 @@ struct DrawParams { bool is_indexed; }; -constexpr auto COMPUTE_SHADER_INDEX = static_cast(Tegra::Engines::ShaderType::Compute); - VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 6243cd176..d463e2b56 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 3; +constexpr u32 CACHE_VERSION = 4; constexpr size_t INST_SIZE = sizeof(u64); -- cgit v1.2.3 From 7dafa96ab59892b7f1fbffdb61e4326e6443955f Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 02:41:09 -0300 Subject: shader: Rework varyings and implement passthrough geometry shaders Put all varyings into a single std::bitset with helpers to access it. Implement passthrough geometry shaders using host's. --- src/shader_recompiler/CMakeLists.txt | 1 + .../backend/glasm/emit_context.cpp | 15 +- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 6 +- .../backend/glasm/emit_glasm_context_get_set.cpp | 6 +- .../backend/glsl/emit_context.cpp | 58 +++--- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 2 +- .../backend/glsl/emit_glsl_context_get_set.cpp | 2 +- .../backend/glsl/emit_glsl_special.cpp | 4 +- .../backend/spirv/emit_context.cpp | 97 +++++----- src/shader_recompiler/backend/spirv/emit_context.h | 2 +- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 19 +- .../backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/shader_recompiler/environment.h | 5 + src/shader_recompiler/frontend/ir/attribute.h | 6 + src/shader_recompiler/frontend/ir/program.h | 1 + .../frontend/maxwell/translate_program.cpp | 18 +- .../ir_opt/collect_shader_info_pass.cpp | 202 +++++++-------------- src/shader_recompiler/profile.h | 1 + src/shader_recompiler/program_header.h | 62 +++---- src/shader_recompiler/runtime_info.h | 3 +- src/shader_recompiler/shader_info.h | 37 +--- src/shader_recompiler/varying_state.h | 69 +++++++ src/video_core/engines/maxwell_3d.h | 7 +- src/video_core/renderer_opengl/gl_shader_cache.cpp | 7 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 16 +- src/video_core/shader_environment.cpp | 10 +- src/video_core/vulkan_common/vulkan_device.cpp | 6 + src/video_core/vulkan_common/vulkan_device.h | 6 + 29 files changed, 345 insertions(+), 331 deletions(-) create mode 100644 src/shader_recompiler/varying_state.h (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 3b5708cb9..b5b7e5e83 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -229,6 +229,7 @@ add_library(shader_recompiler STATIC program_header.h runtime_info.h shader_info.h + varying_state.h ) target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 21e14867c..80dad9ff3 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,14 +83,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (generic.used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", - InterpDecorator(generic.interpolation), index, attr_stage, index, index); + InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads_position) { + if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { @@ -102,7 +101,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile if (info.stores_tess_level_inner) { Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { @@ -124,8 +123,8 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile Add("OUTPUT frag_color{}=result.color[{}];", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 79314f130..2b96977b3 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -296,8 +296,10 @@ void SetupOptions(const IR::Program& program, const Profile& profile, if (info.uses_sparse_residency) { header += "OPTION EXT_sparse_texture2;"; } - if (((info.stores_viewport_index || info.stores_layer) && stage != Stage::Geometry) || - info.stores_viewport_mask) { + const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || + info.stores[IR::Attribute::Layer]}; + if ((stage != Stage::Geometry && stores_viewport_layer) || + info.stores[IR::Attribute::ViewportMask]) { if (profile.support_viewport_index_layer_non_geometry) { header += "OPTION NV_viewport_array2;"; } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index bc195d248..02c9dc6d7 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -261,7 +261,7 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, fmt::format("{}.z", value), fmt::format("{}.w", value)}; read(compare_index, values); }}; - if (ctx.info.loads_position) { + if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { const u32 index{static_cast(IR::Attribute::PositionX)}; if (IsInputArray(ctx.stage)) { read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); @@ -269,8 +269,8 @@ void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); } } - for (u32 index = 0; index < ctx.info.input_generics.size(); ++index) { - if (!ctx.info.input_generics[index].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!ctx.info.loads.Generic(index)) { continue; } read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 14c009535..0d7f7bc3b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -212,22 +212,22 @@ std::string_view OutputPrimitive(OutputTopology topology) { } void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { - if (!ctx.info.stores_legacy_varyings) { + if (!ctx.info.stores.Legacy()) { return; } - if (ctx.info.stores_fixed_fnc_textures) { + if (ctx.info.stores.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.stores_color_front_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_FrontColor;"; } - if (ctx.info.stores_color_front_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { header += "vec4 gl_FrontSecondaryColor;"; } - if (ctx.info.stores_color_back_diffuse) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { header += "vec4 gl_BackColor;"; } - if (ctx.info.stores_color_back_specular) { + if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { header += "vec4 gl_BackSecondaryColor;"; } } @@ -237,32 +237,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) { return; } header += "out gl_PerVertex{vec4 gl_Position;"; - if (ctx.info.stores_point_size) { + if (ctx.info.stores[IR::Attribute::PointSize]) { header += "float gl_PointSize;"; } - if (ctx.info.stores_clip_distance) { + if (ctx.info.stores.ClipDistances()) { header += "float gl_ClipDistance[];"; } - if (ctx.info.stores_viewport_index && ctx.profile.support_viewport_index_layer_non_geometry && - ctx.stage != Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && + ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { header += "int gl_ViewportIndex;"; } SetupLegacyOutPerVertex(ctx, header); header += "};"; - if (ctx.info.stores_viewport_index && ctx.stage == Stage::Geometry) { + if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { header += "out int gl_ViewportIndex;"; } } void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { - if (!ctx.info.loads_legacy_varyings) { + if (!ctx.info.loads.Legacy()) { return; } header += "in gl_PerFragment{"; - if (ctx.info.loads_fixed_fnc_textures) { + if (ctx.info.loads.FixedFunctionTexture()) { header += "vec4 gl_TexCoord[8];"; } - if (ctx.info.loads_color_front_diffuse) { + if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { header += "vec4 gl_Color;"; } header += "};"; @@ -325,14 +325,13 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile SetupOutPerVertex(*this, header); SetupLegacyInPerFragment(*this, header); - for (size_t index = 0; index < info.input_generics.size(); ++index) { - const auto& generic{info.input_generics[index]}; - if (!generic.used || !runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { continue; } - header += - fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, - InterpDecorator(generic.interpolation), index, InputArrayDecorator(stage)); + header += fmt::format("layout(location={}){}in vec4 in_attr{}{};", index, + InterpDecorator(info.interpolation[index]), index, + InputArrayDecorator(stage)); } for (size_t index = 0; index < info.uses_patches.size(); ++index) { if (!info.uses_patches[index]) { @@ -349,11 +348,10 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); } } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (!info.stores_generics[index]) { - continue; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { + DefineGenericOutput(index, program.invocations); } - DefineGenericOutput(index, program.invocations); } DefineConstantBuffers(bindings); DefineStorageBuffers(bindings); @@ -398,14 +396,14 @@ void EmitContext::SetupExtensions() { header += "#extension GL_NV_shader_thread_shuffle : enable\n"; } } - if ((info.stores_viewport_index || info.stores_layer) && + if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { header += "#extension GL_ARB_sparse_texture2 : enable\n"; } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { header += "#extension GL_NV_viewport_array2 : enable\n"; } if (info.uses_typeless_image_reads) { @@ -535,20 +533,20 @@ void EmitContext::DefineHelperFunctions() { fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " "masked_index=uint(base_index)&3u;switch(base_index>>2){{", vertex_arg)}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { const auto position_idx{is_array ? "gl_in[vertex]." : ""}; func += fmt::format("case {}:return {}{}[masked_index];", static_cast(IR::Attribute::PositionX) >> 2, position_idx, position_name); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } const auto vertex_idx{is_array ? "[vertex]" : ""}; func += fmt::format("case {}:return in_attr{}{}[masked_index];", - base_attribute_value + i, i, vertex_idx); + base_attribute_value + index, index, vertex_idx); } func += "default: return 0.0;}}"; header += func; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 32c4f1da2..8deaf5760 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -171,7 +171,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { } std::string GlslVersionSpecifier(const EmitContext& ctx) { - if (ctx.uses_y_direction || ctx.info.stores_legacy_varyings || ctx.info.loads_legacy_varyings) { + if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { return " compatibility"; } return ""; diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 3d2ba2eee..16e2a8502 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,7 +179,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { ctx.AddF32("{}=0.f;", inst, attr); return; } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index 6420aaa21..298881c7b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -20,8 +20,8 @@ void InitializeOutputVaryings(EmitContext& ctx) { if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { ctx.Add("gl_Position=vec4(0,0,0,1);"); } - for (size_t index = 0; index < ctx.info.stores_generics.size(); ++index) { - if (!ctx.info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!ctx.info.stores.Generic(index)) { continue; } const auto& info_array{ctx.output_generics.at(index)}; diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 4c6501129..af4fb0c69 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -557,7 +557,7 @@ void EmitContext::DefineCommonConstants() { } void EmitContext::DefineInterfaces(const IR::Program& program) { - DefineInputs(program.info); + DefineInputs(program); DefineOutputs(program); } @@ -693,16 +693,16 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (u32 i = 0; i < info.input_generics.size(); ++i) { - if (!info.input_generics[i].used) { + for (u32 index = 0; index < static_cast(IR::NUM_GENERICS); ++index) { + if (!info.loads.Generic(index)) { continue; } - literals.push_back(base_attribute_value + i); + literals.push_back(base_attribute_value + index); labels.push_back(OpLabel()); } OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); @@ -710,7 +710,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturnValue(Const(0.0f)); size_t label_index{0}; - if (info.loads_position) { + if (info.loads.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{is_array ? OpAccessChain(input_f32, input_position, vertex, masked_index) @@ -719,18 +719,18 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { OpReturnValue(result); ++label_index; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - if (!info.input_generics[i].used) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.loads.Generic(index)) { continue; } AddLabel(labels[label_index]); - const auto type{AttrTypes(*this, static_cast(i))}; + const auto type{AttrTypes(*this, static_cast(index))}; if (!type) { OpReturnValue(Const(0.0f)); ++label_index; continue; } - const Id generic_id{input_generics.at(i)}; + const Id generic_id{input_generics.at(index)}; const Id pointer{is_array ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) : OpAccessChain(type->pointer, generic_id, masked_index)}; @@ -758,19 +758,19 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; std::vector literals; std::vector labels; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { literals.push_back(static_cast(IR::Attribute::PositionX) >> 2); labels.push_back(OpLabel()); } const u32 base_attribute_value = static_cast(IR::Attribute::Generic0X) >> 2; - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - literals.push_back(base_attribute_value + static_cast(i)); + literals.push_back(base_attribute_value + static_cast(index)); labels.push_back(OpLabel()); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { literals.push_back(static_cast(IR::Attribute::ClipDistance0) >> 2); labels.push_back(OpLabel()); literals.push_back(static_cast(IR::Attribute::ClipDistance4) >> 2); @@ -781,28 +781,28 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { AddLabel(default_label); OpReturn(); size_t label_index{0}; - if (info.stores_position) { + if (info.stores.AnyComponent(IR::Attribute::PositionX)) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - for (size_t i = 0; i < info.stores_generics.size(); ++i) { - if (!info.stores_generics[i]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (!info.stores.Generic(index)) { continue; } - if (output_generics[i][0].num_components != 4) { + if (output_generics[index][0].num_components != 4) { throw NotImplementedException("Physical stores and transform feedbacks"); } AddLabel(labels[label_index]); - const Id generic_id{output_generics[i][0].id}; + const Id generic_id{output_generics[index][0].id}; const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; OpStore(pointer, store_value); OpReturn(); ++label_index; } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { AddLabel(labels[label_index]); const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; OpStore(pointer, store_value); @@ -1146,7 +1146,10 @@ void EmitContext::DefineImages(const Info& info, u32& binding) { } } -void EmitContext::DefineInputs(const Info& info) { +void EmitContext::DefineInputs(const IR::Program& program) { + const Info& info{program.info}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; + if (info.uses_workgroup_id) { workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); } @@ -1183,15 +1186,20 @@ void EmitContext::DefineInputs(const Info& info) { fswzadd_lut_b = ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); } - if (info.loads_primitive_id) { + if (loads[IR::Attribute::PrimitiveId]) { primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); } - if (info.loads_position) { + if (loads.AnyComponent(IR::Attribute::PositionX)) { const bool is_fragment{stage != Stage::Fragment}; const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; input_position = DefineInput(*this, F32[4], true, built_in); + if (profile.support_geometry_shader_passthrough) { + if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + Decorate(input_position, spv::Decoration::PassthroughNV); + } + } } - if (info.loads_instance_id) { + if (loads[IR::Attribute::InstanceId]) { if (profile.support_vertex_instance_id) { instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); } else { @@ -1199,7 +1207,7 @@ void EmitContext::DefineInputs(const Info& info) { base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); } } - if (info.loads_vertex_id) { + if (loads[IR::Attribute::VertexId]) { if (profile.support_vertex_instance_id) { vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); } else { @@ -1207,24 +1215,24 @@ void EmitContext::DefineInputs(const Info& info) { base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); } } - if (info.loads_front_face) { + if (loads[IR::Attribute::FrontFace]) { front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); } - if (info.loads_point_coord) { + if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); } - if (info.loads_tess_coord) { + if (loads[IR::Attribute::TessellationEvaluationPointU] || + loads[IR::Attribute::TessellationEvaluationPointV]) { tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); } - for (size_t index = 0; index < info.input_generics.size(); ++index) { - if (!runtime_info.previous_stage_stores_generic[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const AttributeType input_type{runtime_info.generic_input_types[index]}; + if (!runtime_info.previous_stage_stores.Generic(index)) { continue; } - const InputVarying generic{info.input_generics[index]}; - if (!generic.used) { + if (!loads.Generic(index)) { continue; } - const AttributeType input_type{runtime_info.generic_input_types[index]}; if (input_type == AttributeType::Disabled) { continue; } @@ -1234,10 +1242,13 @@ void EmitContext::DefineInputs(const Info& info) { Name(id, fmt::format("in_attr{}", index)); input_generics[index] = id; + if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { + Decorate(id, spv::Decoration::PassthroughNV); + } if (stage != Stage::Fragment) { continue; } - switch (generic.interpolation) { + switch (info.interpolation[index]) { case Interpolation::Smooth: // Default // Decorate(id, spv::Decoration::Smooth); @@ -1266,42 +1277,42 @@ void EmitContext::DefineInputs(const Info& info) { void EmitContext::DefineOutputs(const IR::Program& program) { const Info& info{program.info}; const std::optional invocations{program.invocations}; - if (info.stores_position || stage == Stage::VertexB) { + if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); } - if (info.stores_point_size || runtime_info.fixed_state_point_size) { + if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing PointSize in fragment stage"); } output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); } - if (info.stores_clip_distance) { + if (info.stores.ClipDistances()) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ClipDistance in fragment stage"); } const Id type{TypeArray(F32[1], Const(8U))}; clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); } - if (info.stores_layer && + if (info.stores[IR::Attribute::Layer] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing Layer in fragment stage"); } layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); } - if (info.stores_viewport_index && + if (info.stores[IR::Attribute::ViewportIndex] && (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { if (stage == Stage::Fragment) { throw NotImplementedException("Storing ViewportIndex in fragment stage"); } viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, spv::BuiltIn::ViewportMaskNV); } - for (size_t index = 0; index < info.stores_generics.size(); ++index) { - if (info.stores_generics[index]) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (info.stores.Generic(index)) { DefineGenericOutput(*this, index, invocations); } } diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 527685fb8..e277bc358 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h @@ -300,7 +300,7 @@ private: void DefineAttributeMemAccess(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); - void DefineInputs(const Info& info); + void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); }; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 278c262f8..ddb86d070 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -281,11 +281,19 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); break; } - if (program.info.stores_point_size) { + if (program.info.stores[IR::Attribute::PointSize]) { ctx.AddCapability(spv::Capability::GeometryPointSize); } ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); + if (program.is_geometry_passthrough) { + if (ctx.profile.support_geometry_shader_passthrough) { + ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); + ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); + } else { + LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); + } + } break; case Stage::Fragment: execution_model = spv::ExecutionModel::Fragment; @@ -377,20 +385,21 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); } - if (info.stores_viewport_index) { + if (info.stores[IR::Attribute::ViewportIndex]) { ctx.AddCapability(spv::Capability::MultiViewport); } - if (info.stores_viewport_mask && profile.support_viewport_mask) { + if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { ctx.AddExtension("SPV_NV_viewport_array2"); ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); } - if (info.stores_layer || info.stores_viewport_index) { + if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (!profile.support_vertex_instance_id && (info.loads_instance_id || info.loads_vertex_id)) { + if (!profile.support_vertex_instance_id && + (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 85bd72389..77fbb2b2f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,7 +298,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores_generic[index]) { + if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { // Attribute is disabled return ctx.Const(0.0f); } diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h index 090bc1c08..8369d0d84 100644 --- a/src/shader_recompiler/environment.h +++ b/src/shader_recompiler/environment.h @@ -31,6 +31,10 @@ public: return sph; } + [[nodiscard]] const std::array& GpPassthroughMask() const noexcept { + return gp_passthrough_mask; + } + [[nodiscard]] Stage ShaderStage() const noexcept { return stage; } @@ -41,6 +45,7 @@ public: protected: ProgramHeader sph{}; + std::array gp_passthrough_mask{}; Stage stage{}; u32 start_address{}; }; diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h index 8bf2ddf30..ca1199494 100644 --- a/src/shader_recompiler/frontend/ir/attribute.h +++ b/src/shader_recompiler/frontend/ir/attribute.h @@ -222,6 +222,8 @@ enum class Attribute : u64 { FrontFace = 255, }; +constexpr size_t NUM_GENERICS = 32; + [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); @@ -230,6 +232,10 @@ enum class Attribute : u64 { [[nodiscard]] std::string NameOf(Attribute attribute); +[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { + return static_cast(static_cast(attribute) + value); +} + } // namespace Shader::IR template <> diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h index 9ede5b48d..ebcaa8bc2 100644 --- a/src/shader_recompiler/frontend/ir/program.h +++ b/src/shader_recompiler/frontend/ir/program.h @@ -27,6 +27,7 @@ struct Program { u32 invocations{}; u32 local_memory_size{}; u32 shared_memory_size{}; + bool is_geometry_passthrough{}; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a8b727f1a..6b4b0ce5b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -46,7 +46,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { return; } const ProgramHeader& sph{env.SPH()}; - for (size_t index = 0; index < program.info.input_generics.size(); ++index) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { std::optional imap; for (const PixelImap value : sph.ps.GenericInputMap(static_cast(index))) { if (value == PixelImap::Unused) { @@ -60,7 +60,7 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { if (!imap) { continue; } - program.info.input_generics[index].interpolation = [&] { + program.info.interpolation[index] = [&] { switch (*imap) { case PixelImap::Unused: case PixelImap::Perspective: @@ -140,6 +140,11 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool>(mask); + } break; } case Stage::Compute: @@ -194,12 +199,9 @@ IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b result.stage = Stage::VertexB; result.info = vertex_a.info; result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); - for (size_t index = 0; index < 32; ++index) { - result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; - if (vertex_b.info.stores_generics[index]) { - result.info.stores_generics[index] = true; - } - } + result.info.loads.mask |= vertex_b.info.loads.mask; + result.info.stores.mask |= vertex_b.info.stores.mask; + Optimization::JoinTextureInfo(result.info, vertex_b.info); Optimization::JoinStorageInfo(result.info, vertex_b.info); Optimization::DeadCodeEliminationPass(result); diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index a82472152..5e32ac784 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -29,130 +29,6 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { }); } -void GetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.input_generics.at(IR::GenericAttributeIndex(attr)).used = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.loads_fixed_fnc_textures = true; - info.loads_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::PrimitiveId: - info.loads_primitive_id = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.loads_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.loads_color_front_diffuse = true; - info.loads_legacy_varyings = true; - break; - case IR::Attribute::PointSpriteS: - case IR::Attribute::PointSpriteT: - info.loads_point_coord = true; - break; - case IR::Attribute::TessellationEvaluationPointU: - case IR::Attribute::TessellationEvaluationPointV: - info.loads_tess_coord = true; - break; - case IR::Attribute::InstanceId: - info.loads_instance_id = true; - break; - case IR::Attribute::VertexId: - info.loads_vertex_id = true; - break; - case IR::Attribute::FrontFace: - info.loads_front_face = true; - break; - default: - throw NotImplementedException("Get attribute {}", attr); - } -} - -void SetAttribute(Info& info, IR::Attribute attr) { - if (IR::IsGeneric(attr)) { - info.stores_generics[IR::GenericAttributeIndex(attr)] = true; - return; - } - if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { - info.stores_fixed_fnc_textures = true; - info.stores_legacy_varyings = true; - return; - } - switch (attr) { - case IR::Attribute::Layer: - info.stores_layer = true; - break; - case IR::Attribute::ViewportIndex: - info.stores_viewport_index = true; - break; - case IR::Attribute::PointSize: - info.stores_point_size = true; - break; - case IR::Attribute::PositionX: - case IR::Attribute::PositionY: - case IR::Attribute::PositionZ: - case IR::Attribute::PositionW: - info.stores_position = true; - break; - case IR::Attribute::ColorFrontDiffuseR: - case IR::Attribute::ColorFrontDiffuseG: - case IR::Attribute::ColorFrontDiffuseB: - case IR::Attribute::ColorFrontDiffuseA: - info.stores_color_front_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorFrontSpecularR: - case IR::Attribute::ColorFrontSpecularG: - case IR::Attribute::ColorFrontSpecularB: - case IR::Attribute::ColorFrontSpecularA: - info.stores_color_front_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackDiffuseR: - case IR::Attribute::ColorBackDiffuseG: - case IR::Attribute::ColorBackDiffuseB: - case IR::Attribute::ColorBackDiffuseA: - info.stores_color_back_diffuse = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ColorBackSpecularR: - case IR::Attribute::ColorBackSpecularG: - case IR::Attribute::ColorBackSpecularB: - case IR::Attribute::ColorBackSpecularA: - info.stores_color_back_specular = true; - info.stores_legacy_varyings = true; - break; - case IR::Attribute::ClipDistance0: - case IR::Attribute::ClipDistance1: - case IR::Attribute::ClipDistance2: - case IR::Attribute::ClipDistance3: - case IR::Attribute::ClipDistance4: - case IR::Attribute::ClipDistance5: - case IR::Attribute::ClipDistance6: - case IR::Attribute::ClipDistance7: - info.stores_clip_distance = true; - break; - case IR::Attribute::FogCoordinate: - info.stores_fog_coordinate = true; - break; - case IR::Attribute::ViewportMask: - info.stores_viewport_mask = true; - break; - default: - throw NotImplementedException("Set attribute {}", attr); - } -} - void GetPatch(Info& info, IR::Patch patch) { if (!IR::IsGeneric(patch)) { throw NotImplementedException("Reading non-generic patch {}", patch); @@ -511,10 +387,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: - GetAttribute(info, inst.Arg(0).Attribute()); + info.loads.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: - SetAttribute(info, inst.Arg(0).Attribute()); + info.stores.mask[static_cast(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::GetPatch: GetPatch(info, inst.Arg(0).Patch()); @@ -943,26 +819,78 @@ void GatherInfoFromHeader(Environment& env, Info& info) { if (!info.loads_indexed_attributes) { return; } - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.ps.IsGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const size_t offset{static_cast(IR::Attribute::Generic0X) + index * 4}; + const auto vector{header.ps.imap_generic_vector[index]}; + info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; + info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; + info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; + info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; } - info.loads_position |= header.ps.imap_systemb.position != 0; return; } if (info.loads_indexed_attributes) { - for (size_t i = 0; i < info.input_generics.size(); i++) { - info.input_generics[i].used |= header.vtg.IsInputGenericVectorActive(i); + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask = header.vtg.InputGeneric(index); + for (size_t i = 0; i < 4; ++i) { + info.loads.Set(attribute + i, mask[i]); + } + } + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.clip_distances}; + info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); } - info.loads_position |= header.vtg.imap_systemb.position != 0; + info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); + info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); + info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); + info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); + info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); + info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); + info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); + info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); + info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); + info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); + info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.tessellation_eval_point_u != 0); + info.loads.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.tessellation_eval_point_v != 0); + info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); + info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); + // TODO: Legacy varyings } if (info.stores_indexed_attributes) { - for (size_t i = 0; i < info.stores_generics.size(); i++) { - if (header.vtg.IsOutputGenericVectorActive(i)) { - info.stores_generics[i] = true; + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; + const auto mask{header.vtg.OutputGeneric(index)}; + for (size_t i = 0; i < 4; ++i) { + info.stores.Set(attribute + i, mask[i]); } } - info.stores_clip_distance |= header.vtg.omap_systemc.clip_distances != 0; - info.stores_position |= header.vtg.omap_systemb.position != 0; + for (size_t index = 0; index < 8; ++index) { + const u16 mask{header.vtg.omap_systemc.clip_distances}; + info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); + } + info.stores.Set(IR::Attribute::PrimitiveId, + header.vtg.omap_systemb.primitive_array_id != 0); + info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); + info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); + info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); + info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); + info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); + info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); + info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); + info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); + info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); + info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointU, + header.vtg.omap_systemc.tessellation_eval_point_u != 0); + info.stores.Set(IR::Attribute::TessellationEvaluationPointV, + header.vtg.omap_systemc.tessellation_eval_point_v != 0); + info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); + info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); + // TODO: Legacy varyings } } } // Anonymous namespace diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d46be1638..ee1887b56 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -34,6 +34,7 @@ struct Profile { bool support_demote_to_helper_invocation{}; bool support_int64_atomics{}; bool support_derivative_control{}; + bool support_geometry_shader_passthrough{}; bool support_gl_nv_gpu_shader_5{}; bool support_gl_amd_gpu_shader_half_float{}; bool support_gl_texture_shadow_lod{}; diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h index 6933750aa..bd6c2bfb5 100644 --- a/src/shader_recompiler/program_header.h +++ b/src/shader_recompiler/program_header.h @@ -37,7 +37,9 @@ struct ProgramHeader { BitField<15, 1, u32> kills_pixels; BitField<16, 1, u32> does_global_store; BitField<17, 4, u32> sass_version; - BitField<21, 5, u32> reserved; + BitField<21, 2, u32> reserved1; + BitField<24, 1, u32> geometry_passthrough; + BitField<25, 1, u32> reserved2; BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; @@ -79,24 +81,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } imap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } imap_generic_vector[16]; + std::array imap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // ImapColor union { @@ -122,24 +110,10 @@ struct ProgramHeader { BitField<5, 1, u8> position_y; BitField<6, 1, u8> position_z; BitField<7, 1, u8> position_w; - BitField<0, 4, u8> first; - BitField<4, 4, u8> position; u8 raw; } omap_systemb; - union { - BitField<0, 1, u8> x; - BitField<1, 1, u8> y; - BitField<2, 1, u8> z; - BitField<3, 1, u8> w; - BitField<4, 1, u8> x2; - BitField<5, 1, u8> y2; - BitField<6, 1, u8> z2; - BitField<7, 1, u8> w2; - BitField<0, 4, u8> first; - BitField<4, 4, u8> second; - u8 raw; - } omap_generic_vector[16]; + std::array omap_generic_vector; INSERT_PADDING_BYTES_NOINIT(2); // OmapColor @@ -157,18 +131,24 @@ struct ProgramHeader { INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved - [[nodiscard]] bool IsInputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return imap_generic_vector[index >> 1].first != 0; - } - return imap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array InputGeneric(size_t index) const noexcept { + const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } - [[nodiscard]] bool IsOutputGenericVectorActive(size_t index) const { - if ((index & 1) == 0) { - return omap_generic_vector[index >> 1].first != 0; - } - return omap_generic_vector[index >> 1].second != 0; + [[nodiscard]] std::array OutputGeneric(size_t index) const noexcept { + const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; + return { + (data & 1) != 0, + (data & 2) != 0, + (data & 4) != 0, + (data & 8) != 0, + }; } } vtg; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 63fe2afaf..f3f83a258 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -10,6 +10,7 @@ #include #include "common/common_types.h" +#include "shader_recompiler/varying_state.h" namespace Shader { @@ -60,7 +61,7 @@ struct TransformFeedbackVarying { struct RuntimeInfo { std::array generic_input_types{}; - std::bitset<32> previous_stage_stores_generic{}; + VaryingState previous_stage_stores; bool convert_depth_mode{}; bool force_early_z{}; diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index a20e15d2e..4ef4dbd40 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "shader_recompiler/frontend/ir/type.h" +#include "shader_recompiler/varying_state.h" #include #include @@ -44,11 +45,6 @@ enum class Interpolation { NoPerspective, }; -struct InputVarying { - Interpolation interpolation{Interpolation::Smooth}; - bool used{false}; -}; - struct ConstantBufferDescriptor { u32 index; u32 count; @@ -121,18 +117,10 @@ struct Info { bool uses_subgroup_shuffles{}; std::array uses_patches{}; - std::array input_generics{}; - bool loads_primitive_id{}; - bool loads_position{}; - bool loads_color_front_diffuse{}; - bool loads_fixed_fnc_textures{}; - bool loads_point_coord{}; - bool loads_instance_id{}; - bool loads_vertex_id{}; - bool loads_front_face{}; - bool loads_legacy_varyings{}; - - bool loads_tess_coord{}; + std::array interpolation{}; + VaryingState loads; + VaryingState stores; + VaryingState passthrough; bool loads_indexed_attributes{}; @@ -140,21 +128,6 @@ struct Info { bool stores_sample_mask{}; bool stores_frag_depth{}; - std::bitset<32> stores_generics{}; - bool stores_layer{}; - bool stores_viewport_index{}; - bool stores_point_size{}; - bool stores_position{}; - bool stores_color_front_diffuse{}; - bool stores_color_front_specular{}; - bool stores_color_back_diffuse{}; - bool stores_color_back_specular{}; - bool stores_fixed_fnc_textures{}; - bool stores_clip_distance{}; - bool stores_fog_coordinate{}; - bool stores_viewport_mask{}; - bool stores_legacy_varyings{}; - bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h @@ -0,0 +1,69 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "shader_recompiler/frontend/ir/attribute.h" + +namespace Shader { + +struct VaryingState { + std::bitset<256> mask{}; + + void Set(IR::Attribute attribute, bool state = true) { + mask[static_cast(attribute)] = state; + } + + [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { + return mask[static_cast(attribute)]; + } + + [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] || mask[static_cast(base) + 1] || + mask[static_cast(base) + 2] || mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { + return mask[static_cast(base) + 0] && mask[static_cast(base) + 1] && + mask[static_cast(base) + 2] && mask[static_cast(base) + 3]; + } + + [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { + return AnyComponent(base) == AllComponents(base); + } + + [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { + return mask[static_cast(IR::Attribute::Generic0X) + index * 4 + component]; + } + + [[nodiscard]] bool Generic(size_t index) const noexcept { + return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); + } + + [[nodiscard]] bool ClipDistances() const noexcept { + return AnyComponent(IR::Attribute::ClipDistance0) || + AnyComponent(IR::Attribute::ClipDistance4); + } + + [[nodiscard]] bool Legacy() const noexcept { + return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || + AnyComponent(IR::Attribute::ColorFrontSpecularR) || + AnyComponent(IR::Attribute::ColorBackDiffuseR) || + AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); + } + + [[nodiscard]] bool FixedFunctionTexture() const noexcept { + for (size_t index = 0; index < 10; ++index) { + if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { + return true; + } + } + return false; + } +}; + +} // namespace Shader diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index da2ded671..471d5686a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -961,7 +961,11 @@ public: SamplerIndex sampler_index; - INSERT_PADDING_WORDS_NOINIT(0x25); + INSERT_PADDING_WORDS_NOINIT(0x2); + + std::array gp_passthrough_mask; + + INSERT_PADDING_WORDS_NOINIT(0x1B); u32 depth_test_enable; @@ -1628,6 +1632,7 @@ ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); ASSERT_REG_POSITION(zeta_depth, 0x48c); ASSERT_REG_POSITION(sampler_index, 0x48D); +ASSERT_REG_POSITION(gp_passthrough_mask, 0x490); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5af9b7745..06e39a503 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -61,10 +61,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, bool glasm_use_storage_buffers, bool use_assembly_shaders) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + // Mark all stores as available for vertex shaders + info.previous_stage_stores.mask.set(); } switch (program.stage) { case Shader::Stage::VertexB: @@ -187,6 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), + .support_geometry_shader_passthrough = false, // TODO .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 70e183e65..6d664ed6b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -487,10 +487,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { static_vector vertex_binding_divisors; static_vector vertex_attributes; if (key.state.dynamic_vertex_input) { - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const u32 type = key.state.DynamicAttributeType(index); - if (!input_attributes[index].used || type == 0) { + if (!stage_infos[0].loads.Generic(index) || type == 0) { continue; } vertex_attributes.push_back({ @@ -526,10 +525,9 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { }); } } - const auto& input_attributes = stage_infos[0].input_generics; for (size_t index = 0; index < key.state.attributes.size(); ++index) { const auto& attribute = key.state.attributes[index]; - if (!attribute.enabled || !input_attributes[index].used) { + if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { continue; } vertex_attributes.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ec06b124f..7aaa40ef2 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -123,18 +123,21 @@ Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t inde return Shader::AttributeType::Disabled; } -Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineCacheKey& key, +Shader::RuntimeInfo MakeRuntimeInfo(std::span programs, + const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program, const Shader::IR::Program* previous_program) { Shader::RuntimeInfo info; if (previous_program) { - info.previous_stage_stores_generic = previous_program->info.stores_generics; + info.previous_stage_stores = previous_program->info.stores; + if (previous_program->is_geometry_passthrough) { + info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; + } } else { - // Mark all stores as available - info.previous_stage_stores_generic.flip(); + info.previous_stage_stores.mask.set(); } const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != 0}; + const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast(key.state.point_size)}; switch (stage) { @@ -302,6 +305,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_demote_to_helper_invocation = true, .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), .support_derivative_control = true, + .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -518,7 +522,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program, previous_stage)}; + const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; const std::vector code{EmitSPIRV(profile, runtime_info, program, binding)}; device.SaveShader(code); modules[stage_index] = BuildShader(device, code); diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index d463e2b56..429cab30d 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,7 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 4; +constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -155,6 +155,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { .write(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.write(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.write(reinterpret_cast(&gp_passthrough_mask), + sizeof(gp_passthrough_mask)); + } } } @@ -202,6 +206,7 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, u32 start_address_) : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + gp_passthrough_mask = maxwell3d->regs.gp_passthrough_mask; switch (program) { case Maxwell::ShaderProgram::VertexA: stage = Shader::Stage::VertexA; @@ -319,6 +324,9 @@ void FileEnvironment::Deserialize(std::ifstream& file) { .read(reinterpret_cast(&shared_memory_size), sizeof(shared_memory_size)); } else { file.read(reinterpret_cast(&sph), sizeof(sph)); + if (stage == Shader::Stage::Geometry) { + file.read(reinterpret_cast(&gp_passthrough_mask), sizeof(gp_passthrough_mask)); + } } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 7b184d2f8..da4721e6b 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -350,6 +350,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_INFO(Render_Vulkan, "Device doesn't support viewport masks"); } + if (!nv_geometry_shader_passthrough) { + LOG_INFO(Render_Vulkan, "Device doesn't support passthrough geometry shaders"); + } + VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; if (khr_uniform_buffer_standard_layout) { std430_layout = { @@ -768,6 +772,8 @@ std::vector Device::LoadExtensions(bool requires_surface) { }; test(nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true); test(nv_viewport_array2, VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME, true); + test(nv_geometry_shader_passthrough, VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME, + true); test(khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a9c0a0e4d..d0adc0127 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -194,6 +194,11 @@ public: return nv_viewport_array2; } + /// Returns true if the device supports VK_NV_geometry_shader_passthrough. + bool IsNvGeometryShaderPassthroughSupported() const { + return nv_geometry_shader_passthrough; + } + /// Returns true if the device supports VK_KHR_uniform_buffer_standard_layout. bool IsKhrUniformBufferStandardLayoutSupported() const { return khr_uniform_buffer_standard_layout; @@ -363,6 +368,7 @@ private: bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil. bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. bool nv_viewport_array2{}; ///< Support for VK_NV_viewport_array2. + bool nv_geometry_shader_passthrough{}; ///< Support for VK_NV_geometry_shader_passthrough. bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts. bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4. bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. -- cgit v1.2.3 From 8a3427a4c857aa08e365d1776d1f0d9f32639c9c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 24 Jun 2021 17:40:24 -0300 Subject: glasm: Add passthrough geometry shader support --- .../backend/glasm/emit_context.cpp | 5 ++-- src/shader_recompiler/backend/glasm/emit_glasm.cpp | 28 ++++++++++++++++++---- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 ++++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 5 files changed, 33 insertions(+), 8 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index 80dad9ff3..069c019ad 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp @@ -83,13 +83,14 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile break; } const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; + const VaryingState loads{info.loads.mask | info.passthrough.mask}; for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { - if (info.loads.Generic(index)) { + if (loads.Generic(index)) { Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); } } - if (IsInputArray(stage) && info.loads.AnyComponent(IR::Attribute::PositionX)) { + if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { Add("ATTRIB vertex_position=vertex.position;"); } if (info.uses_invocation_id) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2b96977b3..64787b353 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -304,6 +304,9 @@ void SetupOptions(const IR::Program& program, const Profile& profile, header += "OPTION NV_viewport_array2;"; } } + if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { + header += "OPTION NV_geometry_shader_passthrough;"; + } if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { header += "OPTION EXT_shader_image_load_formatted;"; } @@ -410,11 +413,26 @@ std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, I runtime_info.tess_clockwise ? "CW" : "CCW"); break; case Stage::Geometry: - header += fmt::format("PRIMITIVE_IN {};" - "PRIMITIVE_OUT {};" - "VERTICES_OUT {};", - InputPrimitive(runtime_info.input_topology), - OutputPrimitive(program.output_topology), program.output_vertices); + header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); + if (program.is_geometry_passthrough) { + if (profile.support_geometry_shader_passthrough) { + for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { + if (program.info.passthrough.Generic(index)) { + header += fmt::format("PASSTHROUGH result.attrib[{}];", index); + } + } + if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { + header += "PASSTHROUGH result.position;"; + } + } else { + LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); + } + } else { + header += + fmt::format("VERTICES_OUT {};" + "PRIMITIVE_OUT {};", + program.output_vertices, OutputPrimitive(program.output_topology)); + } break; case Stage::Compute: header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 27be347e6..6818951f2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -160,6 +160,7 @@ Device::Device() { has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; has_debugging_tool_attached = IsDebugToolAttached(extensions); has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); + has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough; has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64"); has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad7b01b06..45ddf5e01 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -120,6 +120,10 @@ public: return has_depth_buffer_float; } + bool HasGeometryShaderPassthrough() const { + return has_geometry_shader_passthrough; + } + bool HasNvGpuShader5() const { return has_nv_gpu_shader_5; } @@ -174,6 +178,7 @@ private: bool use_asynchronous_shaders{}; bool use_driver_cache{}; bool has_depth_buffer_float{}; + bool has_geometry_shader_passthrough{}; bool has_nv_gpu_shader_5{}; bool has_shader_int64{}; bool has_amd_shader_half_float{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 06e39a503..af8e9f44d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -187,7 +187,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_demote_to_helper_invocation = false, .support_int64_atomics = false, .support_derivative_control = device.HasDerivativeControl(), - .support_geometry_shader_passthrough = false, // TODO + .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), .support_gl_texture_shadow_lod = device.HasTextureShadowLod(), -- cgit v1.2.3 From 1152d66ddd4e7b29b53e01990fef77e4cff20e24 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Thu, 8 Jul 2021 17:28:48 -0400 Subject: general: Add setting shader_backend GLASM is getting good enough that we can move it out of advanced graphics settings. This removes the setting `use_assembly_shaders`, opting for a enum class `shader_backend`. This comes with the benefits that it is extensible for additional shader backends besides GLSL and GLASM, and this will work better with a QComboBox. Qt removes the related assembly shader setting from the Advanced Graphics section and places it as a new QComboBox in the API Settings group. This will replace the Vulkan device selector when OpenGL is selected. Additionally, mark all of the custom anisotropic filtering settings as "WILL BREAK THINGS", as that is the case with a select few games. --- src/common/settings.cpp | 4 +- src/common/settings.h | 8 +- src/core/telemetry_session.cpp | 4 +- src/video_core/renderer_opengl/gl_device.cpp | 10 +- src/yuzu/configuration/config.cpp | 7 +- src/yuzu/configuration/config.h | 3 +- src/yuzu/configuration/configure_graphics.cpp | 76 ++++++++----- src/yuzu/configuration/configure_graphics.h | 4 +- src/yuzu/configuration/configure_graphics.ui | 118 ++++++++++++++++----- .../configuration/configure_graphics_advanced.cpp | 7 -- .../configuration/configure_graphics_advanced.h | 1 - .../configuration/configure_graphics_advanced.ui | 18 +--- src/yuzu_cmd/config.cpp | 2 +- src/yuzu_cmd/default_ini.h | 7 +- 14 files changed, 182 insertions(+), 87 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/common/settings.cpp b/src/common/settings.cpp index bf5514386..66268ea0f 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -57,7 +57,7 @@ void LogSettings() { log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); - log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); @@ -140,7 +140,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); - values.use_assembly_shaders.SetGlobal(true); + values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.use_caches_gc.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ac0590690..32dfb1d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -24,6 +24,12 @@ enum class RendererBackend : u32 { Vulkan = 1, }; +enum class ShaderBackend : u32 { + GLSL = 0, + GLASM = 1, + SPIRV = 2, +}; + enum class GPUAccuracy : u32 { Normal = 0, High = 1, @@ -334,7 +340,7 @@ struct Values { Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; BasicSetting disable_fps_limit{false, "disable_fps_limit"}; - Setting use_assembly_shaders{false, "use_assembly_shaders"}; + Setting shader_backend{ShaderBackend::GLASM, "shader_backend"}; Setting use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting use_fast_gpu_time{true, "use_fast_gpu_time"}; Setting use_caches_gc{false, "use_caches_gc"}; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 066cb23e4..422de3a7d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -233,8 +233,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_nvdec_emulation.GetValue()); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); - AddField(field_type, "Renderer_UseAssemblyShaders", - Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_ShaderBackend", + static_cast(Settings::values.shader_backend.GetValue())); AddField(field_type, "Renderer_UseAsynchronousShaders", Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode.GetValue()); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6818951f2..c4eeed53b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,9 +172,10 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && - GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_assembly_shaders = + Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && + GLAD_GL_NV_transform_feedback2; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && @@ -187,7 +188,8 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) { + if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && + !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index dc69574a9..52b3ed02e 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -814,7 +814,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); - ReadGlobalSetting(Settings::values.use_assembly_shaders); + ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); ReadGlobalSetting(Settings::values.use_caches_gc); @@ -1345,7 +1345,10 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.use_nvdec_emulation); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); - WriteGlobalSetting(Settings::values.use_assembly_shaders); + WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), + static_cast(Settings::values.shader_backend.GetValue(global)), + static_cast(Settings::values.shader_backend.GetDefault()), + Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); WriteGlobalSetting(Settings::values.use_caches_gc); diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 96f9b6de1..4bbb9f1cd 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -180,5 +180,6 @@ private: // These metatype declarations cannot be in common/settings.h because core is devoid of QT Q_DECLARE_METATYPE(Settings::CPUAccuracy); -Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::GPUAccuracy); +Q_DECLARE_METATYPE(Settings::RendererBackend); +Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 4d5b4c0e6..463448dbf 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -26,19 +26,25 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ui->setupUi(this); + ui->backend->addItem(QStringLiteral("GLSL")); + ui->backend->addItem(tr("GLASM (NVIDIA Only)")); + ui->backend->addItem(QStringLiteral("SPIR-V")); + SetupPerGameUI(); SetConfiguration(); connect(ui->api, qOverload(&QComboBox::currentIndexChanged), this, [this] { - UpdateDeviceComboBox(); + UpdateAPILayout(); if (!Settings::IsConfiguringGlobal()) { ConfigurationShared::SetHighlight( - ui->api_layout, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); + ui->api_widget, ui->api->currentIndex() != ConfigurationShared::USE_GLOBAL_INDEX); } }); connect(ui->device, qOverload(&QComboBox::activated), this, [this](int device) { UpdateDeviceSelection(device); }); + connect(ui->backend, qOverload(&QComboBox::activated), this, + [this](int backend) { UpdateShaderBackendSelection(backend); }); connect(ui->bg_button, &QPushButton::clicked, this, [this] { const QColor new_bg_color = QColorDialog::getColor(bg_color); @@ -48,6 +54,10 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) UpdateBackgroundColorButton(new_bg_color); }); + for (const auto& device : vulkan_devices) { + ui->device->addItem(device); + } + ui->bg_label->setVisible(Settings::IsConfiguringGlobal()); ui->bg_combobox->setVisible(!Settings::IsConfiguringGlobal()); } @@ -61,12 +71,21 @@ void ConfigureGraphics::UpdateDeviceSelection(int device) { } } +void ConfigureGraphics::UpdateShaderBackendSelection(int backend) { + if (backend == -1) { + return; + } + if (GetCurrentGraphicsBackend() == Settings::RendererBackend::OpenGL) { + shader_backend = static_cast(backend); + } +} + ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); - ui->api->setEnabled(runtime_lock); + ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); @@ -83,7 +102,7 @@ void ConfigureGraphics::SetConfiguration() { ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); - ConfigurationShared::SetHighlight(ui->api_layout, + ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, @@ -100,11 +119,10 @@ void ConfigureGraphics::SetConfiguration() { ui->bg_button->setEnabled(!Settings::values.bg_red.UsingGlobal()); ConfigurationShared::SetHighlight(ui->bg_layout, !Settings::values.bg_red.UsingGlobal()); } - UpdateBackgroundColorButton(QColor::fromRgb(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue())); - UpdateDeviceComboBox(); + UpdateAPILayout(); } void ConfigureGraphics::ApplyConfiguration() { @@ -128,6 +146,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.shader_backend.UsingGlobal()) { + Settings::values.shader_backend.SetValue(shader_backend); + } if (Settings::values.vulkan_device.UsingGlobal()) { Settings::values.vulkan_device.SetValue(vulkan_device); } @@ -139,15 +160,22 @@ void ConfigureGraphics::ApplyConfiguration() { } else { if (ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.renderer_backend.SetGlobal(true); + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(true); } else { Settings::values.renderer_backend.SetGlobal(false); Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); - if (GetCurrentGraphicsBackend() == Settings::RendererBackend::Vulkan) { + switch (GetCurrentGraphicsBackend()) { + case Settings::RendererBackend::OpenGL: + Settings::values.shader_backend.SetGlobal(false); + Settings::values.vulkan_device.SetGlobal(true); + Settings::values.shader_backend.SetValue(shader_backend); + break; + case Settings::RendererBackend::Vulkan: + Settings::values.shader_backend.SetGlobal(true); Settings::values.vulkan_device.SetGlobal(false); Settings::values.vulkan_device.SetValue(vulkan_device); - } else { - Settings::values.vulkan_device.SetGlobal(true); + break; } } @@ -188,32 +216,32 @@ void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) { ui->bg_button->setIcon(color_icon); } -void ConfigureGraphics::UpdateDeviceComboBox() { - ui->device->clear(); - - bool enabled = false; - +void ConfigureGraphics::UpdateAPILayout() { if (!Settings::IsConfiguringGlobal() && ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + vulkan_device = Settings::values.vulkan_device.GetValue(true); + shader_backend = Settings::values.shader_backend.GetValue(true); + ui->device_widget->setEnabled(false); + ui->backend_widget->setEnabled(false); + } else { vulkan_device = Settings::values.vulkan_device.GetValue(); + shader_backend = Settings::values.shader_backend.GetValue(); + ui->device_widget->setEnabled(true); + ui->backend_widget->setEnabled(true); } + switch (GetCurrentGraphicsBackend()) { case Settings::RendererBackend::OpenGL: - ui->device->addItem(tr("OpenGL Graphics Device")); - enabled = false; + ui->backend->setCurrentIndex(static_cast(shader_backend)); + ui->device_widget->setVisible(false); + ui->backend_widget->setVisible(true); break; case Settings::RendererBackend::Vulkan: - for (const auto& device : vulkan_devices) { - ui->device->addItem(device); - } ui->device->setCurrentIndex(vulkan_device); - enabled = !vulkan_devices.empty(); + ui->device_widget->setVisible(true); + ui->backend_widget->setVisible(false); break; } - // If in per-game config and use global is selected, don't enable. - enabled &= !(!Settings::IsConfiguringGlobal() && - ui->api->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX); - ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); } void ConfigureGraphics::RetrieveVulkanDevices() try { diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index 6418115cf..c866b911b 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -34,8 +34,9 @@ private: void SetConfiguration(); void UpdateBackgroundColorButton(QColor color); - void UpdateDeviceComboBox(); + void UpdateAPILayout(); void UpdateDeviceSelection(int device); + void UpdateShaderBackendSelection(int backend); void RetrieveVulkanDevices(); @@ -53,4 +54,5 @@ private: std::vector vulkan_devices; u32 vulkan_device{}; + Settings::ShaderBackend shader_backend{}; }; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 5b999d84d..099ddbb7c 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -23,7 +23,7 @@ - + 0 @@ -40,37 +40,107 @@ 6 - - - - API: - + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + Shader Backend: + + + + + + + - - - - - OpenGL + + + + + 0 - - - - Vulkan + + 0 - + + 0 + + + 0 + + + + + Device: + + + + + + + - - - - Device: - + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + API: + + + + + + + + 0 + 0 + + + + + OpenGL + + + + + Vulkan + + + + + - - - diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a9e611125..38276feb1 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -23,12 +23,10 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); - ui->use_assembly_shaders->setEnabled(runtime_lock); ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); - ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -58,8 +56,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, - ui->use_assembly_shaders, use_assembly_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); @@ -100,7 +96,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); - ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); @@ -112,8 +107,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { } ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); - ConfigurationShared::SetColoredTristate( - ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, Settings::values.use_asynchronous_shaders, use_asynchronous_shaders); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 9148aacf2..7356e6916 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -35,7 +35,6 @@ private: std::unique_ptr ui; ConfigurationShared::CheckState use_vsync; - ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; ConfigurationShared::CheckState use_caches_gc; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index ad0840355..772e5fed3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -76,16 +76,6 @@ - - - - Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental. - - - Use assembly shaders (experimental, Nvidia OpenGL only) - - - @@ -144,22 +134,22 @@ - 2x + 2x (WILL BREAK THINGS) - 4x + 4x (WILL BREAK THINGS) - 8x + 8x (WILL BREAK THINGS) - 16x + 16x (WILL BREAK THINGS) diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 763df6dd6..640d7d111 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -458,7 +458,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_asynchronous_gpu_emulation); ReadSetting("Renderer", Settings::values.use_vsync); ReadSetting("Renderer", Settings::values.disable_fps_limit); - ReadSetting("Renderer", Settings::values.use_assembly_shaders); + ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a6ca7b6cd..b7115b06a 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -248,9 +248,10 @@ max_anisotropy = # 0 (default): Off, 1: On use_vsync = -# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. -# 0: Off, 1 (default): On -use_assembly_shaders = +# Selects the OpenGL shader backend. NV_gpu_program5 is required for GLASM. If NV_gpu_program5 is +# not available and GLASM is selected, GLSL will be used. +# 0: GLSL, 1 (default): GLASM, 2: SPIR-V +shader_backend = # Whether to allow asynchronous shader building. # 0 (default): Off, 1: On -- cgit v1.2.3 From fb9b1787f86d069db27fe0af44ded042c6d8de39 Mon Sep 17 00:00:00 2001 From: lat9nq <22451773+lat9nq@users.noreply.github.com> Date: Tue, 22 Jun 2021 01:12:11 -0400 Subject: video_core: Enable GL SPIR-V shaders --- .../renderer_opengl/gl_compute_pipeline.cpp | 17 ++++-- .../renderer_opengl/gl_compute_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 8 ++- src/video_core/renderer_opengl/gl_device.h | 11 ++++ .../renderer_opengl/gl_graphics_pipeline.cpp | 64 ++++++++++++++-------- .../renderer_opengl/gl_graphics_pipeline.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 40 +++++++++++--- 7 files changed, 105 insertions(+), 38 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 2d6442d74..c63e87a56 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -5,6 +5,7 @@ #include #include "common/cityhash.h" +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -40,15 +41,23 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code) + std::string code, std::vector code_v) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { - if (device.UseAssemblyShaders()) { - assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: source_program.handle = glCreateProgram(); AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); + break; + case Settings::ShaderBackend::GLASM: + assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); + break; + case Settings::ShaderBackend::SPIRV: + source_program.handle = glCreateProgram(); + AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); + LinkProgram(source_program.handle); + break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), uniform_buffer_sizes.begin()); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index b5fc45f26..50c676365 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -54,7 +54,7 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, - std::string code); + std::string code, std::vector code_v); void Configure(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c4eeed53b..99f8769fc 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -177,6 +177,11 @@ Device::Device() { GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + shader_backend = (Settings::values.shader_backend.GetValue() == + Settings::ShaderBackend::GLASM) == use_assembly_shaders + ? Settings::values.shader_backend.GetValue() + : Settings::ShaderBackend::GLSL; + // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && !(is_amd || (is_intel && !is_linux)); @@ -188,8 +193,7 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - if (Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - !use_assembly_shaders) { + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 45ddf5e01..ee992aed4 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -8,6 +8,10 @@ #include "common/common_types.h" #include "shader_recompiler/stage.h" +namespace Settings { +enum class ShaderBackend : u32; +}; + namespace OpenGL { class Device { @@ -148,6 +152,10 @@ public: return need_fastmath_off; } + Settings::ShaderBackend GetShaderBackend() const { + return shader_backend; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -159,6 +167,9 @@ private: u32 max_varyings{}; u32 max_compute_shared_memory_size{}; u32 max_glasm_storage_buffer_blocks{}; + + Settings::ShaderBackend shader_backend{}; + bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index a93b03cf7..1f19b5825 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -3,7 +3,11 @@ // Refer to the license.txt file included. #include +#include +#include +#include +#include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" @@ -179,7 +183,8 @@ GraphicsPipeline::GraphicsPipeline( Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, - const std::array& infos, const GraphicsPipelineKey& key_) + std::array, 5> sources_spirv, const std::array& infos, + const GraphicsPipelineKey& key_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_}, key{key_} { @@ -232,29 +237,44 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{sources[stage]}; - if (code.empty()) { - continue; + auto func{ + [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { + if (!device.UseAssemblyShaders()) { + program.handle = glCreateProgram(); } - if (device.UseAssemblyShaders()) { - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } else { - AttachShader(Stage(stage), program.handle, code); + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + case Settings::ShaderBackend::GLASM: { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + } break; + case Settings::ShaderBackend::SPIRV: { + const auto code{sources_spirv[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } break; + } } - } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + if (!device.UseAssemblyShaders()) { + LinkProgram(program.handle); + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built.store(true, std::memory_order_relaxed); + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index f82d712f8..5f5d57385 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -78,6 +78,7 @@ public: ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, std::array sources, + std::array, 5> sources_spirv, const std::array& infos, const GraphicsPipelineKey& key_); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index af8e9f44d..cde0f54c9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -15,6 +15,7 @@ #include "common/fs/path_util.h" #include "common/logging/log.h" #include "common/scope_exit.h" +#include "common/settings.h" #include "common/thread_worker.h" #include "core/core.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" @@ -415,6 +416,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( OGLProgram source_program; std::array sources; + std::array, 5> sources_spirv; Shader::Backend::Bindings binding; Shader::IR::Program* previous_program{}; const bool use_glasm{device.UseAssemblyShaders()}; @@ -431,17 +433,23 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)}; - if (use_glasm) { - sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); - } else { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::GLASM: + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + break; + case Settings::ShaderBackend::SPIRV: + sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding); + break; } previous_program = &program; } auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; - return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - maxwell3d, program_manager, state_tracker, - thread_worker, &shader_notify, sources, infos, key); + return std::make_unique( + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + thread_worker, &shader_notify, sources, sources_spirv, infos, key); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -478,10 +486,24 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } Shader::RuntimeInfo info; info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - const std::string code{device.UseAssemblyShaders() ? EmitGLASM(profile, info, program) - : EmitGLSL(profile, program)}; + + std::string code{}; + std::vector code_spirv; + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + code = EmitGLSL(profile, program); + break; + case Settings::ShaderBackend::GLASM: + code = EmitGLASM(profile, info, program); + break; + case Settings::ShaderBackend::SPIRV: + code_spirv = EmitSPIRV(profile, program); + break; + } + return std::make_unique(device, texture_cache, buffer_cache, gpu_memory, - kepler_compute, program_manager, program.info, code); + kepler_compute, program_manager, program.info, code, + code_spirv); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); return nullptr; -- cgit v1.2.3 From 5643a909bc3fa9f497d2f2e68650f823ed2944ac Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 01:14:06 -0300 Subject: shader: Fix disabled and unwritten attributes and varyings --- .../backend/glsl/emit_glsl_context_get_set.cpp | 8 +++-- .../backend/spirv/emit_spirv_context_get_set.cpp | 6 +++- src/video_core/renderer_opengl/renderer_opengl.cpp | 35 ++++++++++++---------- 3 files changed, 31 insertions(+), 18 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 16e2a8502..d5424301b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -179,8 +179,12 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, const char swizzle{"xyzw"[element]}; if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; - if (!ctx.runtime_info.previous_stage_stores.Generic(index)) { - ctx.AddF32("{}=0.f;", inst, attr); + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + if (element == 3) { + ctx.AddF32("{}=1.f;", inst, attr); + } else { + ctx.AddF32("{}=0.f;", inst, attr); + } return; } ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 77fbb2b2f..756de0a27 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -298,10 +298,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { if (IR::IsGeneric(attr)) { const u32 index{IR::GenericAttributeIndex(attr)}; const std::optional type{AttrTypes(ctx, index)}; - if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index)) { + if (!type) { // Attribute is disabled return ctx.Const(0.0f); } + if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { + // Varying component is not written + return ctx.Const(type && element == 3 ? 1.0f : 0.0f); + } const Id generic_id{ctx.input_generics.at(index)}; const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; const Id value{ctx.OpLoad(type->id, pointer)}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b8777643b..dab0afe6d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -140,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, } AddTelemetryFields(); InitOpenGLObjects(); + + // Initialize default attributes to match hardware's disabled attributes + GLint max_attribs{}; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); + for (GLint attrib = 0; attrib < max_attribs; ++attrib) { + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + } + // Enable seamless cubemaps when per texture parameters are not available + if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { + glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); + } + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } RendererOpenGL::~RendererOpenGL() = default; @@ -256,21 +276,6 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); - - // Enable seamless cubemaps when per texture parameters are not available - if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { - glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); - } - - // Enable unified vertex attributes and query vertex buffer address when the driver supports it - if (device.HasVertexBufferUnifiedMemory()) { - glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); - glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); - } } void RendererOpenGL::AddTelemetryFields() { -- cgit v1.2.3 From fba6bd92d456b4d472ed37e663006fafeef154a9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 26 Jun 2021 17:46:01 -0300 Subject: vk_rasterizer: Workaround bug in VK_EXT_vertex_input_dynamic_state Workaround potential bug on Nvidia's driver where only updating high attributes leaves low attributes out dated. --- src/video_core/engines/maxwell_3d.h | 4 --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 31 +++++++++++++--------- 4 files changed, 20 insertions(+), 19 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 471d5686a..1aa43523a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -305,10 +305,6 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } - bool IsConstant() const { - return constant; - } - bool IsValid() const { return size != Size::Invalid; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0f0d780b5..41d2b73f4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -97,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() { const auto gl_index = static_cast(index); // Disable constant attributes. - if (attrib.IsConstant()) { + if (attrib.constant) { glDisableVertexAttribArray(gl_index); continue; } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index d089da8a4..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -128,7 +128,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, const auto& input = regs.vertex_attrib_format[index]; auto& attribute = attributes[index]; attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.enabled.Assign(input.constant ? 0 : 1); attribute.buffer.Assign(input.buffer); attribute.offset.Assign(input.offset); attribute.type.Assign(static_cast(input.type.Value())); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index bb7301c53..99576b826 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -801,25 +801,30 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) boost::container::static_vector bindings; boost::container::static_vector attributes; + // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up + // generating dirty state. Track the highest dirty attribute and update all attributes until + // that one. + size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - if (!dirty[Dirty::VertexAttribute0 + index]) { - continue; + if (dirty[Dirty::VertexAttribute0 + index]) { + highest_dirty_attr = index; } + } + for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; - - attributes.push_back({ - .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, - .pNext = nullptr, - .location = static_cast(index), - .binding = binding, - .format = attribute.IsConstant() - ? VK_FORMAT_A8B8G8R8_UNORM_PACK32 - : MaxwellToVK::VertexFormat(attribute.type, attribute.size), - .offset = attribute.offset, - }); + if (!attribute.constant) { + attributes.push_back({ + .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, + .pNext = nullptr, + .location = static_cast(index), + .binding = binding, + .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), + .offset = attribute.offset, + }); + } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { -- cgit v1.2.3 From 8722668b3c027f0132d0be07e867247debd08d30 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 29 Jun 2021 18:42:17 -0300 Subject: emit_spirv: Workaround VK_KHR_shader_float_controls on fp16 Nvidia Fix regression on Fire Emblem: Three Houses when using native fp16. --- src/shader_recompiler/backend/spirv/emit_spirv.cpp | 13 ++++++++----- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 + 4 files changed, 12 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index ddb86d070..d7a86e270 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -319,7 +319,7 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit Id main_func) { const Info& info{program.info}; if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { - LOG_WARNING(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); } else if (info.uses_fp32_denorms_flush) { if (profile.support_fp32_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -332,15 +332,15 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); } else { - LOG_WARNING(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); + LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); } } - if (!profile.support_separate_denorm_behavior) { + if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { // No separate denorm behavior return; } if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { - LOG_WARNING(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); } else if (info.uses_fp16_denorms_flush) { if (profile.support_fp16_denorm_flush) { ctx.AddCapability(spv::Capability::DenormFlushToZero); @@ -353,13 +353,16 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddCapability(spv::Capability::DenormPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); } else { - LOG_WARNING(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); + LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); } } } void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, EmitContext& ctx, Id main_func) { + if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { + return; + } if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ee1887b56..6ff12387b 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -58,6 +58,8 @@ struct Profile { bool has_broken_unsigned_image_offsets{}; /// Signed instructions with unsigned data types are misinterpreted bool has_broken_signed_operations{}; + /// Float controls break when fp16 is enabled + bool has_broken_fp16_float_controls{}; /// Dynamic vec4 indexing is broken on some OpenGL drivers bool has_gl_component_indexing_bug{}; /// The precise type qualifier is broken in the fragment stage of some drivers diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index cde0f54c9..2ea9c9f07 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -206,6 +206,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, .has_broken_signed_operations = true, + .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7aaa40ef2..87b843e3d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -315,6 +315,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, .has_broken_unsigned_image_offsets = false, .has_broken_signed_operations = false, + .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, .ignore_nan_fp_comparisons = false, }; host_info = Shader::HostTranslateInfo{ -- cgit v1.2.3 From dbee32d302a5944bc8e99b55d956013503b66c6c Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Thu, 1 Jul 2021 20:32:30 -0400 Subject: gl_shader_cache: Fixes for async shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 25 ++++++++++++++++++++-- src/video_core/renderer_opengl/gl_shader_cache.h | 2 ++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2ea9c9f07..2d7eb3e33 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -328,11 +328,32 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() { if (is_new) { pipeline = CreateGraphicsPipeline(); } + if (!pipeline) { + return nullptr; + } current_pipeline = pipeline.get(); - if (!pipeline || !pipeline->IsBuilt()) { + return BuiltPipeline(current_pipeline); +} + +GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { + if (pipeline->IsBuilt()) { + return pipeline; + } + if (!use_asynchronous_shaders) { + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which + // will be used one time. + if (maxwell3d.regs.zeta_enable) { return nullptr; } - return pipeline.get(); + // If games are using a small index count, we can assume these are full screen quads. + // Usually these shaders are only used once for building textures so we can assume they + // can't be built async + if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { + return pipeline; + } + return nullptr; } ComputePipeline* ShaderCache::CurrentComputePipeline() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 9d5306293..a34110b37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -54,6 +54,8 @@ public: private: GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); + [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; + std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( -- cgit v1.2.3 From 11f04f1022d0820a1fdba38221ecd38f19d86d9e Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 4 Jul 2021 00:34:53 -0400 Subject: shader: Ignore global memory ops on devices lacking int64 support --- .../backend/glsl/emit_context.cpp | 6 ++-- .../backend/glsl/emit_glsl_memory.cpp | 34 ++++++++++++++++---- .../backend/spirv/emit_context.cpp | 2 +- .../backend/spirv/emit_spirv_memory.cpp | 36 ++++++++++++++++++---- src/shader_recompiler/frontend/ir/opcodes.inc | 28 ++++++++--------- src/shader_recompiler/profile.h | 1 + src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 8 files changed, 79 insertions(+), 30 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0dcdff152..e08d2d2eb 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -378,7 +378,7 @@ void EmitContext::SetupExtensions() { if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { header += "#extension GL_EXT_texture_shadow_lod : enable\n"; } - if (info.uses_int64) { + if (info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } if (info.uses_int64_bit_atomics) { @@ -402,7 +402,7 @@ void EmitContext::SetupExtensions() { info.uses_subgroup_shuffles || info.uses_fswzadd) { header += "#extension GL_ARB_shader_ballot : enable\n" "#extension GL_ARB_shader_group_vote : enable\n"; - if (!info.uses_int64) { + if (!info.uses_int64 && profile.support_int64) { header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; } if (profile.support_gl_warp_intrinsics) { @@ -539,7 +539,7 @@ void EmitContext::DefineHelperFunctions() { if (info.uses_atomic_s32_max) { header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; } - if (info.uses_global_memory) { + if (info.uses_global_memory && profile.support_int64) { header += DefineGlobalMemoryFunctions(); } if (info.loads_indexed_attributes) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index daef5fb84..e3957491f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_context.h" #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -38,15 +39,27 @@ void EmitLoadGlobalS16(EmitContext&) { } void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32("{}=LoadGlobal32({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32("{}=LoadGlobal32({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32("{}=0u;", inst); } void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x2("{}=uvec2(0);", inst); } void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { - ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + if (ctx.profile.support_int64) { + return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); + ctx.AddU32x4("{}=uvec4(0);", inst); } void EmitWriteGlobalU8(EmitContext&) { @@ -66,15 +79,24 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal32({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal32({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal64({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal64({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { - ctx.Add("WriteGlobal128({},{});", address, value); + if (ctx.profile.support_int64) { + return ctx.Add("WriteGlobal128({},{});", address, value); + } + LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); } void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 865f34291..2d29d8c14 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp @@ -830,7 +830,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { } void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { - if (!info.uses_global_memory) { + if (!info.uses_global_memory || !profile.support_int64) { return; } using DefPtr = Id StorageDefinitions::*; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index ccebf170d..679ee2684 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -84,15 +84,27 @@ void EmitLoadGlobalS16(EmitContext&) { } Id EmitLoadGlobal32(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u); } Id EmitLoadGlobal64(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u); } Id EmitLoadGlobal128(EmitContext& ctx, Id address) { - return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + if (ctx.profile.support_int64) { + return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); + return ctx.Const(0u, 0u, 0u, 0u); } void EmitWriteGlobalU8(EmitContext&) { @@ -112,15 +124,27 @@ void EmitWriteGlobalS16(EmitContext&) { } void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { - ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + if (ctx.profile.support_int64) { + ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); + return; + } + LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); } Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 9af750283..d91098c80 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -71,20 +71,20 @@ OPCODE(UndefU32, U32, OPCODE(UndefU64, U64, ) // Memory operations -OPCODE(LoadGlobalU8, U32, U64, ) -OPCODE(LoadGlobalS8, U32, U64, ) -OPCODE(LoadGlobalU16, U32, U64, ) -OPCODE(LoadGlobalS16, U32, U64, ) -OPCODE(LoadGlobal32, U32, U64, ) -OPCODE(LoadGlobal64, U32x2, U64, ) -OPCODE(LoadGlobal128, U32x4, U64, ) -OPCODE(WriteGlobalU8, Void, U64, U32, ) -OPCODE(WriteGlobalS8, Void, U64, U32, ) -OPCODE(WriteGlobalU16, Void, U64, U32, ) -OPCODE(WriteGlobalS16, Void, U64, U32, ) -OPCODE(WriteGlobal32, Void, U64, U32, ) -OPCODE(WriteGlobal64, Void, U64, U32x2, ) -OPCODE(WriteGlobal128, Void, U64, U32x4, ) +OPCODE(LoadGlobalU8, U32, Opaque, ) +OPCODE(LoadGlobalS8, U32, Opaque, ) +OPCODE(LoadGlobalU16, U32, Opaque, ) +OPCODE(LoadGlobalS16, U32, Opaque, ) +OPCODE(LoadGlobal32, U32, Opaque, ) +OPCODE(LoadGlobal64, U32x2, Opaque, ) +OPCODE(LoadGlobal128, U32x4, Opaque, ) +OPCODE(WriteGlobalU8, Void, Opaque, U32, ) +OPCODE(WriteGlobalS8, Void, Opaque, U32, ) +OPCODE(WriteGlobalU16, Void, Opaque, U32, ) +OPCODE(WriteGlobalS16, Void, Opaque, U32, ) +OPCODE(WriteGlobal32, Void, Opaque, U32, ) +OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) +OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) // Storage buffer operations OPCODE(LoadStorageU8, U32, U32, U32, ) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 6ff12387b..501dcaf71 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -15,6 +15,7 @@ struct Profile { bool support_descriptor_aliasing{}; bool support_int8{}; bool support_int16{}; + bool support_int64{}; bool support_vertex_instance_id{}; bool support_float_controls{}; bool support_separate_denorm_behavior{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2d7eb3e33..58a4f0fb4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -168,6 +168,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .support_descriptor_aliasing = false, .support_int8 = false, .support_int16 = false, + .support_int64 = device.HasShaderInt64(), .support_vertex_instance_id = true, .support_float_controls = false, .support_separate_denorm_behavior = false, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87b843e3d..a2646fc6d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -280,6 +280,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .support_descriptor_aliasing = true, .support_int8 = true, .support_int16 = device.IsShaderInt16Supported(), + .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, .support_float_controls = true, .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == -- cgit v1.2.3 From 8390286a89dd259f0ff44cc95fc20d017b58046f Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 9 Jul 2021 19:00:11 -0400 Subject: renderers: Disable async shader compilation The current implementation is prone to causing graphical issues. Disable until a better solution is implemented. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++++-- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 99f8769fc..563b291cd 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,9 +182,11 @@ Device::Device() { ? Settings::values.shader_backend.GetValue() : Settings::ShaderBackend::GLSL; + // Completely disable async shaders for now, as it causes graphical glitches + use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - !(is_amd || (is_intel && !is_linux)); + // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + // !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2646fc6d..39db35175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -269,7 +269,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, + use_asynchronous_shaders{false}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3 From 41493fbe89200a4a8321dec7b313872435c57df7 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 11 Jul 2021 01:04:52 -0400 Subject: renderers: Fix clang formatting --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- src/video_core/renderer_vulkan/renderer_vulkan.cpp | 13 +++++++++---- src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 5 ++--- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index dab0afe6d..c9cfa6366 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,8 +24,8 @@ #include "video_core/host_shaders/opengl_present_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/textures/decoders.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6fda06a7e..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -97,14 +97,19 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr context_) try - : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), + : RendererBase(emu_window, std::move(context_)), + telemetry_session(telemetry_session_), + cpu_memory(cpu_memory_), + gpu(gpu_), + library(OpenLibrary()), instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window)), - device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), - state_tracker(gpu), scheduler(device, state_tracker), + device(CreateDevice(instance, dld, *surface)), + memory_allocator(device, false), + state_tracker(gpu), + scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f0ae0b0d6..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -503,9 +503,8 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { vertex_attributes.push_back({ .location = static_cast(index), .binding = 0, - .format = type == 1 ? VK_FORMAT_R32_SFLOAT - : type == 2 ? VK_FORMAT_R32_SINT - : VK_FORMAT_R32_UINT, + .format = type == 1 ? VK_FORMAT_R32_SFLOAT + : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, .offset = 0, }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 99576b826..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -141,7 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), - query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, + query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); -- cgit v1.2.3 From 8c166c68d46d160162caa9b588f1e762c57e52f4 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 11 Jul 2021 23:26:13 -0300 Subject: gl_shader_cache: Properly implement asynchronous shaders --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 58a4f0fb4..24f035c37 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -318,7 +318,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { SetXfbState(graphics_key.xfb_state, regs); } if (current_pipeline && graphics_key == current_pipeline->Key()) { - return current_pipeline->IsBuilt() ? current_pipeline : nullptr; + return BuiltPipeline(current_pipeline); } return CurrentGraphicsPipelineSlowPath(); } -- cgit v1.2.3 From 94af0a00f67c9f28fcaf170458e55b7a95de76bf Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 12 Jul 2021 02:03:25 -0400 Subject: glsl: Clamp shared mem size to GL_MAX_COMPUTE_SHARED_MEMORY_SIZE --- src/shader_recompiler/backend/glsl/emit_glsl.cpp | 11 +++++++++-- src/shader_recompiler/profile.h | 2 ++ src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index ffdc6dbba..c5e819a0a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -218,8 +218,15 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; ctx.header.insert(0, version); if (program.shared_memory_size > 0) { - ctx.header += - fmt::format("shared uint smem[{}];", Common::DivCeil(program.shared_memory_size, 4U)); + const auto requested_size{program.shared_memory_size}; + const auto max_size{profile.gl_max_compute_smem_size}; + const bool needs_clamp{requested_size > max_size}; + if (needs_clamp) { + LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", + requested_size, max_size); + } + const auto smem_size{needs_clamp ? max_size : requested_size}; + ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); } ctx.header += "void main(){\n"; if (program.local_memory_size > 0) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 501dcaf71..f0c3b3b17 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -67,6 +67,8 @@ struct Profile { bool has_gl_precise_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; + + u32 gl_max_compute_smem_size{}; }; } // namespace Shader diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 24f035c37..7ecafc862 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,6 +211,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), .ignore_nan_fp_comparisons = true, + .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, host_info{ .support_float16 = false, -- cgit v1.2.3 From e1ed218b418cd1ed94f6f25ccd0db86b63bd6bb5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 14 Jul 2021 03:48:30 -0300 Subject: renderer_opengl: Use ARB_separate_shader_objects Ensures that states set for a particular stage are not attached to other stages which may not need them. --- .../renderer_opengl/gl_compute_pipeline.cpp | 10 +-- .../renderer_opengl/gl_graphics_pipeline.cpp | 62 ++++++------- .../renderer_opengl/gl_graphics_pipeline.h | 2 +- src/video_core/renderer_opengl/gl_shader_manager.h | 100 ++++++++++++++++----- src/video_core/renderer_opengl/gl_shader_util.cpp | 57 ++++++------ src/video_core/renderer_opengl/gl_shader_util.h | 6 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 11 ++- src/video_core/renderer_opengl/renderer_opengl.h | 3 +- src/video_core/renderer_opengl/util_shaders.cpp | 19 ++-- 9 files changed, 154 insertions(+), 116 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index c63e87a56..aa1cc592f 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -46,17 +46,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} { switch (device.GetShaderBackend()) { case Settings::ShaderBackend::GLSL: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code); - LinkProgram(source_program.handle); + source_program = CreateProgram(code, GL_COMPUTE_SHADER); break; case Settings::ShaderBackend::GLASM: assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); break; case Settings::ShaderBackend::SPIRV: - source_program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v); - LinkProgram(source_program.handle); + source_program = CreateProgram(code_v, GL_COMPUTE_SHADER); break; } std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), @@ -154,7 +150,7 @@ void ComputePipeline::Configure() { if (assembly_program.handle != 0) { program_manager.BindComputeAssemblyProgram(assembly_program.handle); } else { - program_manager.BindProgram(source_program.handle); + program_manager.BindComputeProgram(source_program.handle); } buffer_cache.UnbindComputeTextureBuffers(); size_t texbuf_index{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 1f19b5825..c8b2d833d 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,44 +237,32 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{ - [this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable { - if (!device.UseAssemblyShaders()) { - program.handle = glCreateProgram(); - } - for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { - case Settings::ShaderBackend::GLSL: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; - case Settings::ShaderBackend::GLASM: { - const auto code{sources[stage]}; - if (code.empty()) { - continue; - } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - } break; - case Settings::ShaderBackend::SPIRV: { - const auto code{sources_spirv[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } break; + auto func{[this, device, sources, sources_spirv, + shader_notify](ShaderContext::Context*) mutable { + for (size_t stage = 0; stage < 5; ++stage) { + switch (device.GetShaderBackend()) { + case Settings::ShaderBackend::GLSL: + if (!sources[stage].empty()) { + source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); } + break; + case Settings::ShaderBackend::GLASM: + if (!sources[stage].empty()) { + assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + } + break; + case Settings::ShaderBackend::SPIRV: + if (!sources_spirv[stage].empty()) { + source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); + } + break; } - if (!device.UseAssemblyShaders()) { - LinkProgram(program.handle); - } - if (shader_notify) { - shader_notify->MarkShaderComplete(); - } - is_built.store(true, std::memory_order_relaxed); - }}; + } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + is_built = true; + }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); } else { @@ -449,7 +437,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { - program_manager.BindProgram(program.handle); + program_manager.BindSourcePrograms(source_programs); } const ImageId* views_it{image_view_ids.data()}; GLsizei texture_binding = 0; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5f5d57385..5e34b9537 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -129,7 +129,7 @@ private: void (*configure_func)(GraphicsPipeline*, bool){}; - OGLProgram program; + std::array source_programs; std::array assembly_programs; u32 enabled_stages_mask{}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 88b734bcb..d7ef0775d 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -24,34 +24,68 @@ class ProgramManager { public: explicit ProgramManager(const Device& device) { + glCreateProgramPipelines(1, &pipeline.handle); if (device.UseAssemblyShaders()) { glEnable(GL_COMPUTE_PROGRAM_NV); } } - void BindProgram(GLuint program) { - if (current_source_program == program) { - return; - } - current_source_program = program; + void BindComputeProgram(GLuint program) { glUseProgram(program); + is_compute_bound = true; } void BindComputeAssemblyProgram(GLuint program) { - if (current_compute_assembly_program != program) { - current_compute_assembly_program = program; + if (current_assembly_compute_program != program) { + current_assembly_compute_program = program; glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); + UnbindPipeline(); + } + + void BindSourcePrograms(std::span programs) { + static constexpr std::array stage_enums{ + GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT, + GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT, + }; + for (size_t stage = 0; stage < NUM_STAGES; ++stage) { + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; + glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle); + } + } + BindPipeline(); + } + + void BindPresentPrograms(GLuint vertex, GLuint fragment) { + if (current_programs[0] != vertex) { + current_programs[0] = vertex; + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex); + } + if (current_programs[4] != fragment) { + current_programs[4] = fragment; + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment); + } + glUseProgramStages( + pipeline.handle, + GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0); + current_programs[1] = 0; + current_programs[2] = 0; + current_programs[3] = 0; + + if (current_stage_mask != 0) { + current_stage_mask = 0; + for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) { + glDisable(program_type); + } } + BindPipeline(); } void BindAssemblyPrograms(std::span programs, u32 stage_mask) { - const u32 changed_mask = current_assembly_mask ^ stage_mask; - current_assembly_mask = stage_mask; + const u32 changed_mask = current_stage_mask ^ stage_mask; + current_stage_mask = stage_mask; if (changed_mask != 0) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { @@ -65,25 +99,47 @@ public: } } for (size_t stage = 0; stage < NUM_STAGES; ++stage) { - if (current_assembly_programs[stage] != programs[stage].handle) { - current_assembly_programs[stage] = programs[stage].handle; + if (current_programs[stage] != programs[stage].handle) { + current_programs[stage] = programs[stage].handle; glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle); } } - if (current_source_program != 0) { - current_source_program = 0; - glUseProgram(0); - } + UnbindPipeline(); } void RestoreGuestCompute() {} private: - GLuint current_source_program = 0; + void BindPipeline() { + if (!is_pipeline_bound) { + is_pipeline_bound = true; + glBindProgramPipeline(pipeline.handle); + } + UnbindCompute(); + } + + void UnbindPipeline() { + if (is_pipeline_bound) { + is_pipeline_bound = false; + glBindProgramPipeline(0); + } + UnbindCompute(); + } + + void UnbindCompute() { + if (is_compute_bound) { + is_compute_bound = false; + glUseProgram(0); + } + } + + OGLPipeline pipeline; + bool is_pipeline_bound{}; + bool is_compute_bound{}; - u32 current_assembly_mask = 0; - std::array current_assembly_programs{}; - GLuint current_compute_assembly_program = 0; + u32 current_stage_mask = 0; + std::array current_programs{}; + GLuint current_assembly_compute_program = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5109985f1..d432072ad 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -13,6 +13,33 @@ namespace OpenGL { +static OGLProgram LinkSeparableProgram(GLuint shader) { + OGLProgram program; + program.handle = glCreateProgram(); + glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glAttachShader(program.handle, shader); + glLinkProgram(program.handle); + if (!Settings::values.renderer_debug) { + return program; + } + GLint link_status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status); + + GLint log_length{}; + glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length); + if (log_length == 0) { + return program; + } + std::string log(log_length, 0); + glGetProgramInfoLog(program.handle, log_length, nullptr, log.data()); + if (link_status == GL_FALSE) { + LOG_ERROR(Render_OpenGL, "{}", log); + } else { + LOG_WARNING(Render_OpenGL, "{}", log); + } + return program; +} + static void LogShader(GLuint shader, std::string_view code = {}) { GLint shader_status{}; glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status); @@ -36,7 +63,7 @@ static void LogShader(GLuint shader, std::string_view code = {}) { } } -void AttachShader(GLenum stage, GLuint program, std::string_view code) { +OGLProgram CreateProgram(std::string_view code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); @@ -44,45 +71,23 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) { const GLchar* const code_ptr = code.data(); glShaderSource(shader.handle, 1, &code_ptr, &length); glCompileShader(shader.handle); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle, code); } + return LinkSeparableProgram(shader.handle); } -void AttachShader(GLenum stage, GLuint program, std::span code) { +OGLProgram CreateProgram(std::span code, GLenum stage) { OGLShader shader; shader.handle = glCreateShader(stage); glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), static_cast(code.size_bytes())); glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); - glAttachShader(program, shader.handle); if (Settings::values.renderer_debug) { LogShader(shader.handle); } -} - -void LinkProgram(GLuint program) { - glLinkProgram(program); - if (!Settings::values.renderer_debug) { - return; - } - GLint link_status{}; - glGetProgramiv(program, GL_LINK_STATUS, &link_status); - - GLint log_length{}; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); - if (log_length == 0) { - return; - } - std::string log(log_length, 0); - glGetProgramInfoLog(program, log_length, nullptr, log.data()); - if (link_status == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "{}", log); - } else { - LOG_WARNING(Render_OpenGL, "{}", log); - } + return LinkSeparableProgram(shader.handle); } OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) { diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index ff5aa024f..4e1a2a8e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -17,11 +17,9 @@ namespace OpenGL { -void AttachShader(GLenum stage, GLuint program, std::string_view code); +OGLProgram CreateProgram(std::string_view code, GLenum stage); -void AttachShader(GLenum stage, GLuint program, std::span code); - -void LinkProgram(GLuint program); +OGLProgram CreateProgram(std::span code, GLenum stage); OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c9cfa6366..d15167e19 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -251,10 +251,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color void RendererOpenGL::InitOpenGLObjects() { // Create shader programs - present_program.handle = glCreateProgram(); - AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT); - AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG); - LinkProgram(present_program.handle); + present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); @@ -340,8 +338,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { // Set projection matrix const std::array ortho_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); - program_manager.BindProgram(present_program.handle); - glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); + program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle); + glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, + ortho_matrix.data()); const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b3ee55665..d455f572f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -110,7 +110,8 @@ private: // OpenGL object IDs OGLSampler present_sampler; OGLBuffer vertex_buffer; - OGLProgram present_program; + OGLProgram present_vertex; + OGLProgram present_fragment; OGLFramebuffer screenshot_framebuffer; // GPU address of the vertex buffer diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8aa0683c8..37a4d1d9d 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -42,12 +42,7 @@ using VideoCore::Surface::BytesPerBlock; namespace { OGLProgram MakeProgram(std::string_view source) { - OGLProgram program; - OGLShader shader; - program.handle = glCreateProgram(); - AttachShader(GL_COMPUTE_SHADER, program.handle, source); - LinkProgram(program.handle); - return program; + return CreateProgram(source, GL_COMPUTE_SHADER); } size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { @@ -84,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), }; - program_manager.BindProgram(astc_decoder_program.handle); + program_manager.BindComputeProgram(astc_decoder_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); @@ -132,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_INPUT_BUFFER = 1; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; - program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); @@ -171,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); - program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); + program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); @@ -220,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), "Non-power of two images are not implemented"); - program_manager.BindProgram(pitch_unswizzle_program.handle); + program_manager.BindComputeProgram(pitch_unswizzle_program.handle); glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); glUniform2ui(LOC_ORIGIN, 0, 0); glUniform2i(LOC_DESTINATION, 0, 0); @@ -248,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span Date: Sat, 17 Jul 2021 00:59:57 -0400 Subject: gl_device: Simplify GLASM setting logic --- src/video_core/renderer_opengl/gl_device.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 563b291cd..6afe6c1e1 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -172,16 +172,14 @@ Device::Device() { // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - use_assembly_shaders = - Settings::values.shader_backend.GetValue() == Settings::ShaderBackend::GLASM && - GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && - GLAD_GL_NV_transform_feedback2; - - shader_backend = (Settings::values.shader_backend.GetValue() == - Settings::ShaderBackend::GLASM) == use_assembly_shaders - ? Settings::values.shader_backend.GetValue() - : Settings::ShaderBackend::GLSL; - + shader_backend = Settings::values.shader_backend.GetValue(); + use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM && + GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && + GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { + LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); + shader_backend = Settings::ShaderBackend::GLSL; + } // Completely disable async shaders for now, as it causes graphical glitches use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. @@ -194,11 +192,6 @@ Device::Device() { LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}", has_broken_texture_view_formats); - - if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) { - LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); - } - if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) { LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported"); } -- cgit v1.2.3 From 56478bc9ac5a01ca5c73ba72faae1a5eaae0f8cb Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sat, 17 Jul 2021 16:16:23 -0400 Subject: shader: Fix disabled attribute default values --- src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 2 +- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 756de0a27..fb8c02a77 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -300,7 +300,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { const std::optional type{AttrTypes(ctx, index)}; if (!type) { // Attribute is disabled - return ctx.Const(0.0f); + return ctx.Const(element == 3 ? 1.0f : 0.0f); } if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { // Varying component is not written diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d15167e19..285e78384 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -145,7 +145,7 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, GLint max_attribs{}; glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs); for (GLint attrib = 0; attrib < max_attribs; ++attrib) { - glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 0.0f); + glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f); } // Enable seamless cubemaps when per texture parameters are not available if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { -- cgit v1.2.3 From 258f35515d61d01049d2e433146cab808837bb7d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 18 Jul 2021 21:07:12 -0300 Subject: shader_environment: Receive cache version from outside This allows us invalidating OpenGL and Vulkan separately in the future. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 10 +++++++--- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 9 ++++++--- src/video_core/shader_environment.cpp | 11 +++++------ src/video_core/shader_environment.h | 9 +++++---- 4 files changed, 23 insertions(+), 16 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 7ecafc862..8d6cc074c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -48,9 +48,12 @@ using VideoCommon::ComputeEnvironment; using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -287,7 +290,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, shader_cache_filename, load_compute, load_graphics); + LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -394,7 +397,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { env_ptrs.push_back(&environments.envs[index]); } } - SerializePipeline(graphics_key, env_ptrs, shader_cache_filename); + SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION); return pipeline; } @@ -492,7 +495,8 @@ std::unique_ptr ShaderCache::CreateComputePipeline( if (!pipeline || shader_cache_filename.empty()) { return pipeline; } - SerializePipeline(key, std::array{&env}, shader_cache_filename); + SerializePipeline(key, std::array{&env}, shader_cache_filename, + CACHE_VERSION); return pipeline; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 39db35175..2ce8b4156 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -54,6 +54,8 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; +constexpr u32 CACHE_VERSION = 5; + template auto MakeSpan(Container& container) { return std::span(container.data(), container.size()); @@ -434,7 +436,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading }); ++state.total; }}; - VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, + load_graphics); std::unique_lock lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); @@ -562,7 +565,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } @@ -581,7 +584,7 @@ std::unique_ptr PipelineCache::CreateComputePipeline( } serialization_thread.QueueWork([this, key, env = std::move(env)] { SerializePipeline(key, std::array{&env}, - pipeline_cache_filename); + pipeline_cache_filename, CACHE_VERSION); }); return pipeline; } diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 429cab30d..8a4581c19 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -22,7 +22,6 @@ namespace VideoCommon { constexpr std::array MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION = 5; constexpr size_t INST_SIZE = sizeof(u64); @@ -370,7 +369,7 @@ std::array FileEnvironment::WorkgroupSize() const { } void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename) try { + const std::filesystem::path& filename, u32 cache_version) try { std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); file.exceptions(std::ifstream::failbit); if (!file.is_open()) { @@ -381,7 +380,7 @@ void SerializePipeline(std::span key, std::span(&CACHE_VERSION), sizeof(CACHE_VERSION)); + .write(reinterpret_cast(&cache_version), sizeof(cache_version)); } if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { return; @@ -402,7 +401,7 @@ void SerializePipeline(std::span key, std::span load_compute, Common::UniqueFunction> load_graphics) try { std::ifstream file(filename, std::ios::binary | std::ios::ate); @@ -417,13 +416,13 @@ void LoadPipelines( u32 cache_version; file.read(magic_number.data(), magic_number.size()) .read(reinterpret_cast(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + if (magic_number != MAGIC_NUMBER || cache_version != expected_cache_version) { file.close(); if (Common::FS::RemoveFile(filename)) { if (magic_number != MAGIC_NUMBER) { LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); } - if (cache_version != CACHE_VERSION) { + if (cache_version != expected_cache_version) { LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); } } else { diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index d26dbfaab..2079979db 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h @@ -164,18 +164,19 @@ private: }; void SerializePipeline(std::span key, std::span envs, - const std::filesystem::path& filename); + const std::filesystem::path& filename, u32 cache_version); template -void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename, + u32 cache_version) { static_assert(std::is_trivially_copyable_v); static_assert(std::has_unique_object_representations_v); SerializePipeline(std::span(reinterpret_cast(&key), sizeof(key)), - std::span(envs.data(), envs.size()), filename); + std::span(envs.data(), envs.size()), filename, cache_version); } void LoadPipelines( - std::stop_token stop_loading, const std::filesystem::path& filename, + std::stop_token stop_loading, const std::filesystem::path& filename, u32 expected_cache_version, Common::UniqueFunction load_compute, Common::UniqueFunction> load_graphics); -- cgit v1.2.3 From 8381490a04f4618ec5be90904815b409e3f4ca59 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:05:41 -0300 Subject: opengl: Fix asynchronous shaders Wait for shader to build before configuring it, and wait for the shader to build before sharing it with other contexts. --- .../renderer_opengl/gl_graphics_pipeline.cpp | 30 +++++++++++++++++++--- .../renderer_opengl/gl_graphics_pipeline.h | 7 ++++- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c8b2d833d..fac0034fb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -237,10 +237,12 @@ GraphicsPipeline::GraphicsPipeline( if (key.xfb_enabled && device.UseAssemblyShaders()) { GenerateTransformFeedbackState(); } - auto func{[this, device, sources, sources_spirv, - shader_notify](ShaderContext::Context*) mutable { + const bool in_parallel = thread_worker != nullptr; + const auto backend = device.GetShaderBackend(); + auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + shader_notify, backend, in_parallel](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { - switch (device.GetShaderBackend()) { + switch (backend) { case Settings::ShaderBackend::GLSL: if (!sources[stage].empty()) { source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); @@ -249,6 +251,10 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (in_parallel) { + // Make sure program is built before continuing when building in parallel + glGetString(GL_PROGRAM_ERROR_STRING_NV); + } } break; case Settings::ShaderBackend::SPIRV: @@ -258,10 +264,20 @@ GraphicsPipeline::GraphicsPipeline( break; } } + if (in_parallel && backend != Settings::ShaderBackend::GLASM) { + // Make sure programs have built if we are building shaders in parallel + for (OGLProgram& program : source_programs) { + if (program.handle != 0) { + GLint status{}; + glGetProgramiv(program.handle, GL_LINK_STATUS, &status); + } + } + } if (shader_notify) { shader_notify->MarkShaderComplete(); } is_built = true; + built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -434,6 +450,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + if (!is_built.load(std::memory_order::relaxed)) { + WaitForBuild(); + } if (assembly_programs[0].handle != 0) { program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask); } else { @@ -545,4 +564,9 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { num_xfb_strides = static_cast(current_stream - xfb_streams.data()); } +void GraphicsPipeline::WaitForBuild() { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 5e34b9537..4e28d9a42 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -119,6 +119,8 @@ private: void GenerateTransformFeedbackState(); + void WaitForBuild(); + TextureCache& texture_cache; BufferCache& buffer_cache; Tegra::MemoryManager& gpu_memory; @@ -143,13 +145,16 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; - std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; std::array xfb_attribs{}; std::array xfb_streams{}; + + std::mutex built_mutex; + std::condition_variable built_condvar; + std::atomic_bool is_built{false}; }; } // namespace OpenGL -- cgit v1.2.3 From 3c6d440015d7ffb81eedbfcd7ee1aab1ea87ee2a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 20 Jul 2021 03:08:06 -0300 Subject: Revert "renderers: Disable async shader compilation" This reverts commit 4a152767286717fa69bfc94846a124a366f70065. --- src/video_core/renderer_opengl/gl_device.cpp | 6 ++---- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 6afe6c1e1..9692b8e94 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -180,11 +180,9 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); shader_backend = Settings::ShaderBackend::GLSL; } - // Completely disable async shaders for now, as it causes graphical glitches - use_asynchronous_shaders = false; // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. - // use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && - // !(is_amd || (is_intel && !is_linux)); + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() && + !(is_amd || (is_intel && !is_linux)); use_driver_cache = is_nvidia; LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2ce8b4156..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -271,7 +271,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, - use_asynchronous_shaders{false}, + use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; -- cgit v1.2.3