diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 50 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 153 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 473 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 21 |
6 files changed, 274 insertions, 444 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a44bbfae8..9e93bd609 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -88,19 +88,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } - GLint ext_num; - glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); - for (GLint i = 0; i < ext_num; i++) { - const std::string_view extension{ - reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; - - if (extension == "GL_ARB_direct_state_access") { - has_ARB_direct_state_access = true; - } else if (extension == "GL_ARB_multi_bind") { - has_ARB_multi_bind = true; - } - } - OpenGLState::ApplyDefaultState(); // Create render framebuffer @@ -295,6 +282,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; u32 current_texture_bindpoint = 0; + std::array<bool, Maxwell::NumClipDistances> clip_distances{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; @@ -355,12 +343,22 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, current_texture_bindpoint); + // Workaround for Intel drivers. + // When a clip distance is enabled but not set in the shader it crops parts of the screen + // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the + // clip distances only when it's written by a shader stage. + for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { + clip_distances[i] |= shader->GetShaderEntries().clip_distances[i]; + } + // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { // VertexB was combined with VertexA, so we skip the VertexB iteration index++; } } + + SyncClipEnabled(clip_distances); } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -443,7 +441,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us // TODO(bunnei): Figure out how the below register works. According to envytools, this should be // used to enable multiple render targets. However, it is left unset on all games that I have // tested. - ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented"); + UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0); // Bind the framebuffer surfaces current_state.draw.draw_framebuffer = framebuffer.handle; @@ -642,7 +640,6 @@ void RasterizerOpenGL::DrawArrays() { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(state); - SyncClipEnabled(); // Alpha Testing is synced on shaders. SyncTransformFeedback(); SyncPointState(); @@ -1019,20 +1016,23 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; } -void RasterizerOpenGL::SyncClipEnabled() { +void RasterizerOpenGL::SyncClipEnabled( + const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - state.clip_distance[0] = regs.clip_distance_enabled.c0 != 0; - state.clip_distance[1] = regs.clip_distance_enabled.c1 != 0; - state.clip_distance[2] = regs.clip_distance_enabled.c2 != 0; - state.clip_distance[3] = regs.clip_distance_enabled.c3 != 0; - state.clip_distance[4] = regs.clip_distance_enabled.c4 != 0; - state.clip_distance[5] = regs.clip_distance_enabled.c5 != 0; - state.clip_distance[6] = regs.clip_distance_enabled.c6 != 0; - state.clip_distance[7] = regs.clip_distance_enabled.c7 != 0; + const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ + regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, + regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, + regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, + regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; + + for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { + state.clip_distance[i] = reg_state[i] && clip_mask[i]; + } } void RasterizerOpenGL::SyncClipCoef() { - UNREACHABLE(); + UNIMPLEMENTED(); } void RasterizerOpenGL::SyncCullMode() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7ec9746b1..988fa3e27 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -60,20 +60,6 @@ public: bool AccelerateDrawBatch(bool is_indexed) override; void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; - /// OpenGL shader generated for a given Maxwell register state - struct MaxwellShader { - /// OpenGL shader resource - OGLProgram shader; - }; - - struct VertexShader { - OGLShader shader; - }; - - struct FragmentShader { - OGLShader shader; - }; - /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, @@ -142,7 +128,8 @@ private: void SyncViewport(OpenGLState& current_state); /// Syncs the clip enabled status to match the guest state - void SyncClipEnabled(); + void SyncClipEnabled( + const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask); /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); @@ -193,9 +180,6 @@ private: /// but are needed for correct emulation void CheckExtensions(); - bool has_ARB_direct_state_access = false; - bool has_ARB_multi_bind = false; - OpenGLState state; RasterizerCacheOpenGL res_cache; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index dde2f468d..5f4cdd119 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -405,138 +405,6 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, } } -MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64)); -static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, - GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { - MICROPROFILE_SCOPE(OpenGL_BlitSurface); - - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_fb_handle; - state.draw.draw_framebuffer = draw_fb_handle; - // Set sRGB enabled if the destination surfaces need it - state.framebuffer_srgb.enabled = dst_params.srgb_conversion; - state.ApplyFramebufferState(); - - u32 buffers{}; - - if (src_params.type == SurfaceType::ColorTexture) { - switch (src_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - src_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - src_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - SurfaceTargetToGL(src_params.target), - src_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(src_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - switch (dst_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - dst_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - dst_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - SurfaceTargetToGL(dst_params.target), - dst_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(dst_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - const auto& rect{src_params.GetRect()}; - glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom, - rect.right, rect.top, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - - return true; -} - static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; @@ -841,9 +709,10 @@ void CachedSurface::LoadGLBuffer() { const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; gl_buffer[0].assign(texture_src_data, texture_src_data_end); } - for (u32 i = 0; i < params.max_mip_level; i++) + for (u32 i = 0; i < params.max_mip_level; i++) { ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), params.MipHeight(i), params.MipDepth(i)); + } } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); @@ -1163,7 +1032,10 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& src_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - FlushRegion(src_params.addr, dst_params.MemorySize()); + + // Flush enough memory for both the source and destination surface + FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); + LoadSurface(dst_surface); } @@ -1189,20 +1061,9 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } - // If the format is the same, just do a framebuffer blit. This is significantly faster than - // using PBOs. The is also likely less accurate, as textures will be converted rather than - // reinterpreted. When use_accurate_gpu_emulation setting is enabled, perform a more accurate - // surface copy, where pixels are reinterpreted as a new format (without conversion). This - // code path uses OpenGL PBOs and is quite slow. - const bool is_blit{old_params.pixel_format == new_params.pixel_format}; - switch (new_params.target) { case SurfaceTarget::Texture2D: - if (is_blit) { - BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle); - } else { - CopySurface(old_surface, new_surface, copy_pbo.handle); - } + CopySurface(old_surface, new_surface, copy_pbo.handle); break; case SurfaceTarget::Texture3D: AccurateCopySurface(old_surface, new_surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0c4524d5c..d235bfcd4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -201,14 +201,53 @@ private: } }; +template <typename T> +class ShaderScopedScope { +public: + explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) + : writer(writer), end_expr(std::move(end_expr)) { + + if (begin_expr.empty()) { + writer.AddLine('{'); + } else { + writer.AddExpression(begin_expr); + writer.AddLine(" {"); + } + ++writer.scope; + } + + ShaderScopedScope(const ShaderScopedScope&) = delete; + + ~ShaderScopedScope() { + --writer.scope; + if (end_expr.empty()) { + writer.AddLine('}'); + } else { + writer.AddExpression("} "); + writer.AddExpression(end_expr); + writer.AddLine(';'); + } + } + + ShaderScopedScope& operator=(const ShaderScopedScope&) = delete; + +private: + T& writer; + std::string end_expr; +}; + class ShaderWriter { public: - void AddLine(std::string_view text) { + void AddExpression(std::string_view text) { DEBUG_ASSERT(scope >= 0); if (!text.empty()) { AppendIndentation(); } shader_source += text; + } + + void AddLine(std::string_view text) { + AddExpression(text); AddNewLine(); } @@ -228,6 +267,11 @@ public: return std::move(shader_source); } + ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, + std::string end_expr = {}) { + return ShaderScopedScope(*this, begin_expr, end_expr); + } + int scope = 0; private: @@ -311,7 +355,7 @@ public: // Default - do nothing return value; default: - UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); + UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); } } @@ -525,6 +569,7 @@ public: ((header.vtg.clip_distances >> index) & 1) == 0, "Shader is setting gl_ClipDistance{} without enabling it in the header", index); + clip_distances[index] = true; fixed_pipeline_output_attributes_used.insert(attribute); shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';'); break; @@ -602,6 +647,11 @@ public: return used_samplers; } + /// Returns an array of the used clip distances. + const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const { + return clip_distances; + } + /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if /// necessary. std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, @@ -810,14 +860,12 @@ private: } if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + // This avoids optimizations of constant propagation and keeps the code as the original // Sadly using the precise keyword causes "linking" errors on fragment shaders. shader.AddLine("precise float tmp = " + src + ';'); shader.AddLine(dest + " = tmp;"); - --shader.scope; - shader.AddLine('}'); } else { shader.AddLine(dest + " = " + src + ';'); } @@ -975,6 +1023,7 @@ private: const std::string& suffix; const Tegra::Shader::Header& header; std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; + std::array<bool, Maxwell::NumClipDistances> clip_distances{}; u64 local_memory_size; }; @@ -997,7 +1046,8 @@ public: /// Returns entries in the shader that are useful for external functions ShaderEntries GetEntries() const { - return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), shader_length}; + return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(), + shader_length}; } private: @@ -1293,15 +1343,7 @@ private: regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); } - void WriteTexsInstruction(const Instruction& instr, const std::string& coord, - const std::string& texture) { - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); - shader.AddLine("vec4 texture_tmp = " + texture + ';'); - + void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -1313,19 +1355,17 @@ private: if (written_components < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, written_components % 2); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false, + regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, written_components % 2); } ++written_components; } - --shader.scope; - shader.AddLine('}'); } static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { @@ -1348,12 +1388,10 @@ private: * top. */ void EmitPushToFlowStack(u32 target) { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); shader.AddLine("flow_stack_top++;"); - --shader.scope; - shader.AddLine('}'); } /* @@ -1361,13 +1399,11 @@ private: * popped address and decrementing the stack top. */ void EmitPopFromFlowStack() { - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); + shader.AddLine("flow_stack_top--;"); shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); shader.AddLine("break;"); - --shader.scope; - shader.AddLine('}'); } /// Writes the output values from a fragment shader to the corresponding GLSL output variables. @@ -2279,8 +2315,7 @@ private: UNIMPLEMENTED_IF(instr.conversion.selector); UNIMPLEMENTED_IF_MSG(instr.generates_cc, "Condition codes generation in I2F is not implemented"); - - std::string op_a{}; + std::string op_a; if (instr.is_b_gpr) { op_a = @@ -2436,10 +2471,7 @@ private: case OpCode::Id::LD_C: { UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine("{"); - ++shader.scope; + const auto scope = shader.Scope(); shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); @@ -2465,19 +2497,13 @@ private: UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); } - - --shader.scope; - shader.AddLine("}"); break; } case OpCode::Id::LD_L: { UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", static_cast<unsigned>(instr.ld_l.unknown.Value())); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + std::to_string(instr.smem_imm.Value()) + ')'; @@ -2494,9 +2520,6 @@ private: UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", static_cast<unsigned>(instr.ldst_sl.type.Value())); } - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::ST_A: { @@ -2531,10 +2554,7 @@ private: UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", static_cast<unsigned>(instr.st_l.unknown.Value())); - // Add an extra scope and declare the index register inside to prevent - // overwriting it in case it is used as an output of the LD instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + std::to_string(instr.smem_imm.Value()) + ')'; @@ -2549,14 +2569,10 @@ private: UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", static_cast<unsigned>(instr.ldst_sl.type.Value())); } - - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TEX: { Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; - std::string coord; const bool is_array = instr.tex.array != 0; UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), @@ -2589,21 +2605,23 @@ private: bool depth_compare_extra = false; + const auto scope = shader.Scope(); + switch (num_coordinates) { case 1: { const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index); if (is_array) { if (depth_compare) { - coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " + - array_elem + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + depth_value + ", " + + array_elem + ");"); } else { - coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + array_elem + ");"); } } else { if (depth_compare) { - coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + depth_value + ");"); } else { - coord = "float coords = " + x + ';'; + shader.AddLine("float coords = " + x + ';'); } } break; @@ -2614,17 +2632,18 @@ private: regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1); if (is_array) { if (depth_compare) { - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value + - ", " + array_elem + ");"; + shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + + depth_value + ", " + array_elem + ");"); } else { - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + + array_elem + ");"); } } else { if (depth_compare) { - coord = - "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + + depth_value + ");"); } else { - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); } } break; @@ -2637,14 +2656,14 @@ private: regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2); if (is_array) { depth_compare_extra = depth_compare; - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + - array_elem + ");"; + shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + + array_elem + ");"); } else { if (depth_compare) { - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + - depth_value + ");"; + shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + + depth_value + ");"); } else { - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"); } } break; @@ -2656,7 +2675,7 @@ private: // Fallback to interpreting as a 2D texture for now const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); texture_type = Tegra::Shader::TextureType::Texture2D; } @@ -2665,79 +2684,61 @@ private: // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); - std::string texture; - - switch (instr.tex.GetTextureProcessMode()) { - case Tegra::Shader::TextureProcessMode::None: { - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - break; - } - case Tegra::Shader::TextureProcessMode::LZ: { - if (!depth_compare_extra) { - texture = "textureLod(" + sampler + ", coords, 0.0)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - } - break; - } - case Tegra::Shader::TextureProcessMode::LB: - case Tegra::Shader::TextureProcessMode::LBA: { - // TODO: Figure if A suffix changes the equation at all. - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords, " + lod_value + ')'; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - LOG_WARNING(HW_GPU, - "OpenGL Limitation: can't set bias value along depth compare"); - } - break; - } - case Tegra::Shader::TextureProcessMode::LL: - case Tegra::Shader::TextureProcessMode::LLA: { - // TODO: Figure if A suffix changes the equation at all. - if (!depth_compare_extra) { - texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; - LOG_WARNING(HW_GPU, - "OpenGL Limitation: can't set lod value along depth compare"); - } - break; - } - default: { - if (!depth_compare_extra) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "texture(" + sampler + ", coords, " + depth_value + ')'; + const std::string texture = [&]() { + switch (instr.tex.GetTextureProcessMode()) { + case Tegra::Shader::TextureProcessMode::None: + if (depth_compare_extra) { + return "texture(" + sampler + ", coords, " + depth_value + ')'; + } + return "texture(" + sampler + ", coords)"; + case Tegra::Shader::TextureProcessMode::LZ: + if (depth_compare_extra) { + return "texture(" + sampler + ", coords, " + depth_value + ')'; + } + return "textureLod(" + sampler + ", coords, 0.0)"; + case Tegra::Shader::TextureProcessMode::LB: + case Tegra::Shader::TextureProcessMode::LBA: + // TODO: Figure if A suffix changes the equation at all. + if (depth_compare_extra) { + LOG_WARNING( + HW_GPU, + "OpenGL Limitation: can't set bias value along depth compare"); + return "texture(" + sampler + ", coords, " + depth_value + ')'; + } + return "texture(" + sampler + ", coords, " + lod_value + ')'; + case Tegra::Shader::TextureProcessMode::LL: + case Tegra::Shader::TextureProcessMode::LLA: + // TODO: Figure if A suffix changes the equation at all. + if (depth_compare_extra) { + LOG_WARNING( + HW_GPU, + "OpenGL Limitation: can't set lod value along depth compare"); + return "texture(" + sampler + ", coords, " + depth_value + ')'; + } + return "textureLod(" + sampler + ", coords, " + lod_value + ')'; + default: + UNIMPLEMENTED_MSG("Unhandled texture process mode {}", + static_cast<u32>(instr.tex.GetTextureProcessMode())); + if (depth_compare_extra) { + return "texture(" + sampler + ", coords, " + depth_value + ')'; + } + return "texture(" + sampler + ", coords)"; } - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.tex.GetTextureProcessMode())); - } - } - if (!depth_compare) { - shader.AddLine("vec4 texture_tmp = " + texture + ';'); + }(); + + if (depth_compare) { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); + } else { std::size_t dest_elem{}; for (std::size_t elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { // Skip disabled components continue; } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, - dest_elem); + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); ++dest_elem; } - } else { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TEXS: { @@ -2747,26 +2748,28 @@ private: UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); + const auto scope = shader.Scope(); + const bool depth_compare = instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); u32 num_coordinates = TextureCoordinates(texture_type); const auto process_mode = instr.texs.GetTextureProcessMode(); - std::string lod_value; - std::string coord; u32 lod_offset = 0; if (process_mode == Tegra::Shader::TextureProcessMode::LL) { if (num_coordinates > 2) { - lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); + shader.AddLine("float lod_value = " + + regs.GetRegisterAsFloat(instr.gpr20.Value() + 1) + ';'); lod_offset = 2; } else { - lod_value = regs.GetRegisterAsFloat(instr.gpr20); + shader.AddLine("float lod_value = " + regs.GetRegisterAsFloat(instr.gpr20) + + ';'); lod_offset = 1; } } switch (num_coordinates) { case 1: { - coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';'; + shader.AddLine("float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';'); break; } case 2: { @@ -2776,13 +2779,14 @@ private: const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string y = regs.GetRegisterAsFloat(instr.gpr20); const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1); - coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + - ");"; + shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + + index + ");"); } else { const std::string index = regs.GetRegisterAsInteger(instr.gpr8); const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index + + ");"); } } else { if (lod_offset != 0) { @@ -2792,12 +2796,13 @@ private: regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + + ");"); } else { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); } } else { if (depth_compare) { @@ -2805,11 +2810,12 @@ private: const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + + ");"); } else { const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); } } } @@ -2819,7 +2825,7 @@ private: const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"); break; } default: @@ -2829,42 +2835,37 @@ private: // Fallback to interpreting as a 2D texture for now const std::string x = regs.GetRegisterAsFloat(instr.gpr8); const std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); texture_type = Tegra::Shader::TextureType::Texture2D; is_array = false; } const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); - std::string texture; - switch (process_mode) { - case Tegra::Shader::TextureProcessMode::None: { - texture = "texture(" + sampler + ", coords)"; - break; - } - case Tegra::Shader::TextureProcessMode::LZ: { - if (depth_compare && is_array) { - texture = "texture(" + sampler + ", coords)"; - } else { - texture = "textureLod(" + sampler + ", coords, 0.0)"; + + std::string texture = [&]() { + switch (process_mode) { + case Tegra::Shader::TextureProcessMode::None: + return "texture(" + sampler + ", coords)"; + case Tegra::Shader::TextureProcessMode::LZ: + if (depth_compare && is_array) { + return "texture(" + sampler + ", coords)"; + } else { + return "textureLod(" + sampler + ", coords, 0.0)"; + } + break; + case Tegra::Shader::TextureProcessMode::LL: + return "textureLod(" + sampler + ", coords, lod_value)"; + default: + UNIMPLEMENTED_MSG("Unhandled texture process mode {}", + static_cast<u32>(instr.texs.GetTextureProcessMode())); + return "texture(" + sampler + ", coords)"; } - break; - } - case Tegra::Shader::TextureProcessMode::LL: { - texture = "textureLod(" + sampler + ", coords, " + lod_value + ')'; - break; - } - default: { - texture = "texture(" + sampler + ", coords)"; - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.texs.GetTextureProcessMode())); - } - } - if (!depth_compare) { - WriteTexsInstruction(instr, coord, texture); - } else { - WriteTexsInstruction(instr, coord, "vec4(" + texture + ')'); + }(); + if (depth_compare) { + texture = "vec4(" + texture + ')'; } + WriteTexsInstruction(instr, texture); break; } case OpCode::Id::TLDS: { @@ -2883,15 +2884,12 @@ private: u32 extra_op_offset = 0; - // Scope to avoid variable name overlaps. - shader.AddLine('{'); - ++shader.scope; - std::string coords; + ShaderScopedScope scope = shader.Scope(); switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { const std::string x = regs.GetRegisterAsInteger(instr.gpr8); - coords = "float coords = " + x + ';'; + shader.AddLine("float coords = " + x + ';'); break; } case Tegra::Shader::TextureType::Texture2D: { @@ -2900,7 +2898,7 @@ private: const std::string x = regs.GetRegisterAsInteger(instr.gpr8); const std::string y = regs.GetRegisterAsInteger(instr.gpr20); // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); - coords = "ivec2 coords = ivec2(" + x + ", " + y + ");"; + shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");"); extra_op_offset = 1; break; } @@ -2909,35 +2907,29 @@ private: } const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); - std::string texture = "texelFetch(" + sampler + ", coords, 0)"; - switch (instr.tlds.GetTextureProcessMode()) { - case Tegra::Shader::TextureProcessMode::LZ: { - texture = "texelFetch(" + sampler + ", coords, 0)"; - break; - } - case Tegra::Shader::TextureProcessMode::LL: { - shader.AddLine( - "float lod = " + - regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';'); - texture = "texelFetch(" + sampler + ", coords, lod)"; - break; - } - default: { - texture = "texelFetch(" + sampler + ", coords, 0)"; - UNIMPLEMENTED_MSG("Unhandled texture process mode {}", - static_cast<u32>(instr.tlds.GetTextureProcessMode())); - } - } - WriteTexsInstruction(instr, coords, texture); - --shader.scope; - shader.AddLine('}'); + const std::string texture = [&]() { + switch (instr.tlds.GetTextureProcessMode()) { + case Tegra::Shader::TextureProcessMode::LZ: + return "texelFetch(" + sampler + ", coords, 0)"; + case Tegra::Shader::TextureProcessMode::LL: + shader.AddLine( + "float lod = " + + regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';'); + return "texelFetch(" + sampler + ", coords, lod)"; + default: + UNIMPLEMENTED_MSG("Unhandled texture process mode {}", + static_cast<u32>(instr.tlds.GetTextureProcessMode())); + return "texelFetch(" + sampler + ", coords, 0)"; + } + }(); + + WriteTexsInstruction(instr, texture); break; } case OpCode::Id::TLD4: { ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D); ASSERT(instr.tld4.array == 0); - std::string coord; UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); @@ -2954,10 +2946,7 @@ private: if (depth_compare) num_coordinates += 1; - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; + const auto scope = shader.Scope(); switch (num_coordinates) { case 2: { @@ -2988,23 +2977,19 @@ private: const std::string texture = "textureGather(" + sampler + ", coords, " + std::to_string(instr.tld4.component) + ')'; - if (!depth_compare) { - shader.AddLine("vec4 texture_tmp = " + texture + ';'); + if (depth_compare) { + regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); + } else { std::size_t dest_elem{}; for (std::size_t elem = 0; elem < 4; ++elem) { if (!instr.tex.IsComponentEnabled(elem)) { // Skip disabled components continue; } - regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, - dest_elem); + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); ++dest_elem; } - } else { - regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false); } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TLD4S: { @@ -3015,10 +3000,7 @@ private: instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), "AOFFI is not implemented"); - // Scope to avoid variable name overlaps. - shader.AddLine('{'); - ++shader.scope; - std::string coords; + const auto scope = shader.Scope(); const bool depth_compare = instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); @@ -3027,33 +3009,29 @@ private: // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. const std::string sampler = GetSampler( instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); - if (!depth_compare) { - coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; - } else { + if (depth_compare) { // Note: TLD4S coordinate encoding works just like TEXS's const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");"; - } - const std::string texture = "textureGather(" + sampler + ", coords, " + - std::to_string(instr.tld4s.component) + ')'; - - if (!depth_compare) { - WriteTexsInstruction(instr, coords, texture); + shader.AddLine("vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");"); } else { - WriteTexsInstruction(instr, coords, "vec4(" + texture + ')'); + shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");"); } - --shader.scope; - shader.AddLine('}'); + std::string texture = "textureGather(" + sampler + ", coords, " + + std::to_string(instr.tld4s.component) + ')'; + if (depth_compare) { + texture = "vec4(" + texture + ')'; + } + WriteTexsInstruction(instr, texture); break; } case OpCode::Id::TXQ: { UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); - ++shader.scope; - shader.AddLine('{'); - // TODO: the new commits on the texture refactor, change the way samplers work. + const auto scope = shader.Scope(); + + // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. const std::string sampler = @@ -3064,7 +3042,8 @@ private: regs.GetRegisterAsInteger(instr.gpr8) + ')'; const std::string mip_level = "textureQueryLevels(" + sampler + ')'; shader.AddLine("ivec2 sizes = " + texture + ';'); - regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1); + + regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1); @@ -3075,8 +3054,6 @@ private: static_cast<u32>(instr.txq.query_type.Value())); } } - --shader.scope; - shader.AddLine('}'); break; } case OpCode::Id::TMML: { @@ -3091,17 +3068,18 @@ private: const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); - // TODO: add coordinates for different samplers once other texture types are + const auto scope = shader.Scope(); + + // TODO: Add coordinates for different samplers once other texture types are // implemented. - std::string coord; switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: { - coord = "float coords = " + x + ';'; + shader.AddLine("float coords = " + x + ';'); break; } case Tegra::Shader::TextureType::Texture2D: { const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); break; } default: @@ -3109,22 +3087,15 @@ private: // Fallback to interpreting as a 2D texture for now const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); texture_type = Tegra::Shader::TextureType::Texture2D; } - // Add an extra scope and declare the texture coords inside to prevent - // overwriting them in case they are used as outputs of the texs instruction. - shader.AddLine('{'); - ++shader.scope; - shader.AddLine(coord); + const std::string texture = "textureQueryLod(" + sampler + ", coords)"; - const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);"; - shader.AddLine(tmp); + shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); - --shader.scope; - shader.AddLine('}'); break; } default: { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index b425d98ae..4fa6d7612 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -163,6 +163,7 @@ private: struct ShaderEntries { std::vector<ConstBufferEntry> const_buffer_entries; std::vector<SamplerEntry> texture_samplers; + std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances; std::size_t shader_length; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index b757f5f44..4970aafed 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -60,6 +60,17 @@ public: } void ApplyTo(OpenGLState& state) { + UpdatePipeline(); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; + state.geometry_shaders.enabled = (gs != 0); + } + +private: + void UpdatePipeline() { + // Avoid updating the pipeline when values have no changed + if (old_vs == vs && old_fs == fs && old_gs == gs) + return; // Workaround for AMD bug glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, @@ -68,14 +79,16 @@ public: glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs); glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs); glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs); - state.draw.shader_program = 0; - state.draw.program_pipeline = pipeline.handle; - state.geometry_shaders.enabled = (gs != 0); + + // Update the old values + old_vs = vs; + old_fs = fs; + old_gs = gs; } -private: OGLPipeline pipeline; GLuint vs{}, fs{}, gs{}; + GLuint old_vs{}, old_fs{}, old_gs{}; }; } // namespace OpenGL::GLShader |
