diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 4 |
3 files changed, 96 insertions, 21 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 909ccb82c..0dbc4c02f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn std::string source = "#version 430 core\n" "#extension GL_ARB_separate_shader_objects : enable\n" "#extension GL_NV_gpu_shader5 : enable\n" - "#extension GL_NV_shader_thread_group : enable\n"; + "#extension GL_NV_shader_thread_group : enable\n" + "#extension GL_NV_shader_thread_shuffle : enable\n"; if (entries.shader_viewport_layer_array) { source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 137b23740..76439e7ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -325,6 +325,7 @@ public: DeclareRegisters(); DeclarePredicates(); DeclareLocalMemory(); + DeclareSharedMemory(); DeclareInternalFlags(); DeclareInputAttributes(); DeclareOutputAttributes(); @@ -499,6 +500,13 @@ private: code.AddNewLine(); } + void DeclareSharedMemory() { + if (stage != ProgramType::Compute) { + return; + } + code.AddLine("shared uint {}[];", GetSharedMemory()); + } + void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { const auto flag_code = static_cast<InternalFlag>(flag); @@ -881,6 +889,12 @@ private: Type::Uint}; } + if (const auto smem = std::get_if<SmemNode>(&*node)) { + return { + fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), + Type::Uint}; + } + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; } @@ -1007,10 +1021,10 @@ private: return {std::move(temporary), value.GetType()}; } - Expression GetOutputAttribute(const AbufNode* abuf) { + std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { switch (const auto attribute = abuf->GetIndex()) { case Attribute::Index::Position: - return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}; + return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}}; case Attribute::Index::LayerViewportPointSize: switch (abuf->GetElement()) { case 0: @@ -1020,25 +1034,25 @@ private: if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { return {}; } - return {"gl_Layer", Type::Int}; + return {{"gl_Layer", Type::Int}}; case 2: if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { return {}; } - return {"gl_ViewportIndex", Type::Int}; + return {{"gl_ViewportIndex", Type::Int}}; case 3: UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); - return {"gl_PointSize", Type::Float}; + return {{"gl_PointSize", Type::Float}}; } return {}; case Attribute::Index::ClipDistances0123: - return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}; + return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}}; case Attribute::Index::ClipDistances4567: - return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}; + return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; default: if (IsGenericAttribute(attribute)) { - return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), - Type::Float}; + return { + {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); return {}; @@ -1278,7 +1292,11 @@ private: target = {GetRegister(gpr->GetIndex()), Type::Float}; } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - target = GetOutputAttribute(abuf); + auto output = GetOutputAttribute(abuf); + if (!output) { + return {}; + } + target = std::move(*output); } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { if (stage == ProgramType::Compute) { LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); @@ -1286,6 +1304,11 @@ private: target = { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; + } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { + ASSERT(stage == ProgramType::Compute); + target = { + fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), + Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); @@ -1934,8 +1957,7 @@ private: Expression BallotThread(Operation operation) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia warp intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub on non-Nvidia devices by simulating all threads voting the same as the active // one. return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; @@ -1946,8 +1968,7 @@ private: Expression Vote(Operation operation, const char* func) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub with a warp size of one. return {value, Type::Bool}; } @@ -1964,15 +1985,54 @@ private: Expression VoteEqual(Operation operation) { if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); - // We must return true here since a stub for a theoretical warp size of 1 will always - // return an equal result for all its votes. + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); + // We must return true here since a stub for a theoretical warp size of 1. + // This will always return an equal result across all votes. return {"true", Type::Bool}; } return Vote(operation, "allThreadsEqualNV"); } + template <const std::string_view& func> + Expression Shuffle(Operation operation) { + const std::string value = VisitOperand(operation, 0).AsFloat(); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); + // On a "single-thread" device we are either on the same thread or out of bounds. Both + // cases return the passed value. + return {value, Type::Float}; + } + + const std::string index = VisitOperand(operation, 1).AsUint(); + const std::string width = VisitOperand(operation, 2).AsUint(); + return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + } + + template <const std::string_view& func> + Expression InRangeShuffle(Operation operation) { + const std::string index = VisitOperand(operation, 0).AsUint(); + const std::string width = VisitOperand(operation, 1).AsUint(); + if (!device.HasWarpIntrinsics()) { + // On a "single-thread" device we are only in bounds when the requested index is 0. + return {fmt::format("({} == 0U)", index), Type::Bool}; + } + + const std::string in_range = code.GenerateTemporary(); + code.AddLine("bool {};", in_range); + code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); + return {in_range, Type::Bool}; + } + + struct Func final { + Func() = delete; + ~Func() = delete; + + static constexpr std::string_view ShuffleIndexed = "shuffleNV"; + static constexpr std::string_view ShuffleUp = "shuffleUpNV"; + static constexpr std::string_view ShuffleDown = "shuffleDownNV"; + static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; + }; + static constexpr std::array operation_decompilers = { &GLSLDecompiler::Assign, @@ -2135,6 +2195,16 @@ private: &GLSLDecompiler::VoteAll, &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, + + &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::Shuffle<Func::ShuffleUp>, + &GLSLDecompiler::Shuffle<Func::ShuffleDown>, + &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, + + &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2175,6 +2245,10 @@ private: return "lmem_" + suffix; } + std::string GetSharedMemory() const { + return fmt::format("smem_{}", suffix); + } + std::string GetInternalFlag(InternalFlag flag) const { constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", "overflow_flag"}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ea77dd211..9ed738171 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -145,7 +145,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::None: return GL_LINEAR; case Tegra::Texture::TextureMipmapFilter::Nearest: - return GL_NEAREST_MIPMAP_LINEAR; + return GL_LINEAR_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: return GL_LINEAR_MIPMAP_LINEAR; } @@ -157,7 +157,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Nearest: return GL_NEAREST_MIPMAP_NEAREST; case Tegra::Texture::TextureMipmapFilter::Linear: - return GL_LINEAR_MIPMAP_NEAREST; + return GL_NEAREST_MIPMAP_LINEAR; } } } |
