aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h20
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp473
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h21
6 files changed, 274 insertions, 444 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a44bbfae8..9e93bd609 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -88,19 +88,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
- GLint ext_num;
- glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
- for (GLint i = 0; i < ext_num; i++) {
- const std::string_view extension{
- reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
-
- if (extension == "GL_ARB_direct_state_access") {
- has_ARB_direct_state_access = true;
- } else if (extension == "GL_ARB_multi_bind") {
- has_ARB_multi_bind = true;
- }
- }
-
OpenGLState::ApplyDefaultState();
// Create render framebuffer
@@ -295,6 +282,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
+ std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
@@ -355,12 +343,22 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
primitive_mode, current_texture_bindpoint);
+ // Workaround for Intel drivers.
+ // When a clip distance is enabled but not set in the shader it crops parts of the screen
+ // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
+ // clip distances only when it's written by a shader stage.
+ for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
+ clip_distances[i] |= shader->GetShaderEntries().clip_distances[i];
+ }
+
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
}
+
+ SyncClipEnabled(clip_distances);
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -443,7 +441,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
// TODO(bunnei): Figure out how the below register works. According to envytools, this should be
// used to enable multiple render targets. However, it is left unset on all games that I have
// tested.
- ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
+ UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
// Bind the framebuffer surfaces
current_state.draw.draw_framebuffer = framebuffer.handle;
@@ -642,7 +640,6 @@ void RasterizerOpenGL::DrawArrays() {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest(state);
- SyncClipEnabled();
// Alpha Testing is synced on shaders.
SyncTransformFeedback();
SyncPointState();
@@ -1019,20 +1016,23 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
}
-void RasterizerOpenGL::SyncClipEnabled() {
+void RasterizerOpenGL::SyncClipEnabled(
+ const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
+
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- state.clip_distance[0] = regs.clip_distance_enabled.c0 != 0;
- state.clip_distance[1] = regs.clip_distance_enabled.c1 != 0;
- state.clip_distance[2] = regs.clip_distance_enabled.c2 != 0;
- state.clip_distance[3] = regs.clip_distance_enabled.c3 != 0;
- state.clip_distance[4] = regs.clip_distance_enabled.c4 != 0;
- state.clip_distance[5] = regs.clip_distance_enabled.c5 != 0;
- state.clip_distance[6] = regs.clip_distance_enabled.c6 != 0;
- state.clip_distance[7] = regs.clip_distance_enabled.c7 != 0;
+ const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
+ regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
+ regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
+ regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0,
+ regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0};
+
+ for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
+ state.clip_distance[i] = reg_state[i] && clip_mask[i];
+ }
}
void RasterizerOpenGL::SyncClipCoef() {
- UNREACHABLE();
+ UNIMPLEMENTED();
}
void RasterizerOpenGL::SyncCullMode() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7ec9746b1..988fa3e27 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -60,20 +60,6 @@ public:
bool AccelerateDrawBatch(bool is_indexed) override;
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
- /// OpenGL shader generated for a given Maxwell register state
- struct MaxwellShader {
- /// OpenGL shader resource
- OGLProgram shader;
- };
-
- struct VertexShader {
- OGLShader shader;
- };
-
- struct FragmentShader {
- OGLShader shader;
- };
-
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
@@ -142,7 +128,8 @@ private:
void SyncViewport(OpenGLState& current_state);
/// Syncs the clip enabled status to match the guest state
- void SyncClipEnabled();
+ void SyncClipEnabled(
+ const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
@@ -193,9 +180,6 @@ private:
/// but are needed for correct emulation
void CheckExtensions();
- bool has_ARB_direct_state_access = false;
- bool has_ARB_multi_bind = false;
-
OpenGLState state;
RasterizerCacheOpenGL res_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index dde2f468d..5f4cdd119 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -405,138 +405,6 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
}
}
-MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
-static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
- GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
- GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
- MICROPROFILE_SCOPE(OpenGL_BlitSurface);
-
- const auto& src_params{src_surface->GetSurfaceParams()};
- const auto& dst_params{dst_surface->GetSurfaceParams()};
-
- OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({ prev_state.Apply(); });
-
- OpenGLState state;
- state.draw.read_framebuffer = read_fb_handle;
- state.draw.draw_framebuffer = draw_fb_handle;
- // Set sRGB enabled if the destination surfaces need it
- state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
- state.ApplyFramebufferState();
-
- u32 buffers{};
-
- if (src_params.type == SurfaceType::ColorTexture) {
- switch (src_params.target) {
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- case SurfaceTarget::TextureCubemap:
- glFramebufferTexture2D(
- GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
- src_surface->Texture().handle, 0);
- glFramebufferTexture2D(
- GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
- break;
- case SurfaceTarget::Texture2DArray:
- glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- src_surface->Texture().handle, 0, 0);
- glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
- break;
- case SurfaceTarget::Texture3D:
- glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- SurfaceTargetToGL(src_params.target),
- src_surface->Texture().handle, 0, 0);
- glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- SurfaceTargetToGL(src_params.target), 0, 0, 0);
- break;
- default:
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- }
-
- switch (dst_params.target) {
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- case SurfaceTarget::TextureCubemap:
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
- dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
- break;
- case SurfaceTarget::Texture2DArray:
- glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- dst_surface->Texture().handle, 0, 0);
- glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
- break;
-
- case SurfaceTarget::Texture3D:
- glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- SurfaceTargetToGL(dst_params.target),
- dst_surface->Texture().handle, 0, 0);
- glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- SurfaceTargetToGL(dst_params.target), 0, 0, 0);
- break;
- default:
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- }
-
- buffers = GL_COLOR_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
- buffers = GL_DEPTH_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- src_surface->Texture().handle, 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- dst_surface->Texture().handle, 0);
-
- buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
- }
-
- const auto& rect{src_params.GetRect()};
- glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom,
- rect.right, rect.top, buffers,
- buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
-
- return true;
-}
-
static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -841,9 +709,10 @@ void CachedSurface::LoadGLBuffer() {
const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
gl_buffer[0].assign(texture_src_data, texture_src_data_end);
}
- for (u32 i = 0; i < params.max_mip_level; i++)
+ for (u32 i = 0; i < params.max_mip_level; i++) {
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
params.MipHeight(i), params.MipDepth(i));
+ }
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -1163,7 +1032,10 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- FlushRegion(src_params.addr, dst_params.MemorySize());
+
+ // Flush enough memory for both the source and destination surface
+ FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+
LoadSurface(dst_surface);
}
@@ -1189,20 +1061,9 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
return new_surface;
}
- // If the format is the same, just do a framebuffer blit. This is significantly faster than
- // using PBOs. The is also likely less accurate, as textures will be converted rather than
- // reinterpreted. When use_accurate_gpu_emulation setting is enabled, perform a more accurate
- // surface copy, where pixels are reinterpreted as a new format (without conversion). This
- // code path uses OpenGL PBOs and is quite slow.
- const bool is_blit{old_params.pixel_format == new_params.pixel_format};
-
switch (new_params.target) {
case SurfaceTarget::Texture2D:
- if (is_blit) {
- BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
- } else {
- CopySurface(old_surface, new_surface, copy_pbo.handle);
- }
+ CopySurface(old_surface, new_surface, copy_pbo.handle);
break;
case SurfaceTarget::Texture3D:
AccurateCopySurface(old_surface, new_surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0c4524d5c..d235bfcd4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -201,14 +201,53 @@ private:
}
};
+template <typename T>
+class ShaderScopedScope {
+public:
+ explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr)
+ : writer(writer), end_expr(std::move(end_expr)) {
+
+ if (begin_expr.empty()) {
+ writer.AddLine('{');
+ } else {
+ writer.AddExpression(begin_expr);
+ writer.AddLine(" {");
+ }
+ ++writer.scope;
+ }
+
+ ShaderScopedScope(const ShaderScopedScope&) = delete;
+
+ ~ShaderScopedScope() {
+ --writer.scope;
+ if (end_expr.empty()) {
+ writer.AddLine('}');
+ } else {
+ writer.AddExpression("} ");
+ writer.AddExpression(end_expr);
+ writer.AddLine(';');
+ }
+ }
+
+ ShaderScopedScope& operator=(const ShaderScopedScope&) = delete;
+
+private:
+ T& writer;
+ std::string end_expr;
+};
+
class ShaderWriter {
public:
- void AddLine(std::string_view text) {
+ void AddExpression(std::string_view text) {
DEBUG_ASSERT(scope >= 0);
if (!text.empty()) {
AppendIndentation();
}
shader_source += text;
+ }
+
+ void AddLine(std::string_view text) {
+ AddExpression(text);
AddNewLine();
}
@@ -228,6 +267,11 @@ public:
return std::move(shader_source);
}
+ ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {},
+ std::string end_expr = {}) {
+ return ShaderScopedScope(*this, begin_expr, end_expr);
+ }
+
int scope = 0;
private:
@@ -311,7 +355,7 @@ public:
// Default - do nothing
return value;
default:
- UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+ UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
}
}
@@ -525,6 +569,7 @@ public:
((header.vtg.clip_distances >> index) & 1) == 0,
"Shader is setting gl_ClipDistance{} without enabling it in the header", index);
+ clip_distances[index] = true;
fixed_pipeline_output_attributes_used.insert(attribute);
shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
break;
@@ -602,6 +647,11 @@ public:
return used_samplers;
}
+ /// Returns an array of the used clip distances.
+ const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const {
+ return clip_distances;
+ }
+
/// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
/// necessary.
std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
@@ -810,14 +860,12 @@ private:
}
if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
+
// This avoids optimizations of constant propagation and keeps the code as the original
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
shader.AddLine("precise float tmp = " + src + ';');
shader.AddLine(dest + " = tmp;");
- --shader.scope;
- shader.AddLine('}');
} else {
shader.AddLine(dest + " = " + src + ';');
}
@@ -975,6 +1023,7 @@ private:
const std::string& suffix;
const Tegra::Shader::Header& header;
std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
+ std::array<bool, Maxwell::NumClipDistances> clip_distances{};
u64 local_memory_size;
};
@@ -997,7 +1046,8 @@ public:
/// Returns entries in the shader that are useful for external functions
ShaderEntries GetEntries() const {
- return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), shader_length};
+ return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(),
+ shader_length};
}
private:
@@ -1293,15 +1343,7 @@ private:
regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
}
- void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
- const std::string& texture) {
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
- shader.AddLine(coord);
- shader.AddLine("vec4 texture_tmp = " + texture + ';');
-
+ void WriteTexsInstruction(const Instruction& instr, const std::string& texture) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -1313,19 +1355,17 @@ private:
if (written_components < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
- regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
+ regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false,
written_components % 2);
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
- regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
+ regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false,
written_components % 2);
}
++written_components;
}
- --shader.scope;
- shader.AddLine('}');
}
static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
@@ -1348,12 +1388,10 @@ private:
* top.
*/
void EmitPushToFlowStack(u32 target) {
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
+
shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;");
shader.AddLine("flow_stack_top++;");
- --shader.scope;
- shader.AddLine('}');
}
/*
@@ -1361,13 +1399,11 @@ private:
* popped address and decrementing the stack top.
*/
void EmitPopFromFlowStack() {
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
+
shader.AddLine("flow_stack_top--;");
shader.AddLine("jmp_to = flow_stack[flow_stack_top];");
shader.AddLine("break;");
- --shader.scope;
- shader.AddLine('}');
}
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
@@ -2279,8 +2315,7 @@ private:
UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
-
- std::string op_a{};
+ std::string op_a;
if (instr.is_b_gpr) {
op_a =
@@ -2436,10 +2471,7 @@ private:
case OpCode::Id::LD_C: {
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
- // Add an extra scope and declare the index register inside to prevent
- // overwriting it in case it is used as an output of the LD instruction.
- shader.AddLine("{");
- ++shader.scope;
+ const auto scope = shader.Scope();
shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
" / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
@@ -2465,19 +2497,13 @@ private:
UNIMPLEMENTED_MSG("Unhandled type: {}",
static_cast<unsigned>(instr.ld_c.type.Value()));
}
-
- --shader.scope;
- shader.AddLine("}");
break;
}
case OpCode::Id::LD_L: {
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
static_cast<unsigned>(instr.ld_l.unknown.Value()));
- // Add an extra scope and declare the index register inside to prevent
- // overwriting it in case it is used as an output of the LD instruction.
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
std::to_string(instr.smem_imm.Value()) + ')';
@@ -2494,9 +2520,6 @@ private:
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
}
-
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::ST_A: {
@@ -2531,10 +2554,7 @@ private:
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
static_cast<unsigned>(instr.st_l.unknown.Value()));
- // Add an extra scope and declare the index register inside to prevent
- // overwriting it in case it is used as an output of the LD instruction.
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
std::to_string(instr.smem_imm.Value()) + ')';
@@ -2549,14 +2569,10 @@ private:
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
}
-
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TEX: {
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
- std::string coord;
const bool is_array = instr.tex.array != 0;
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
@@ -2589,21 +2605,23 @@ private:
bool depth_compare_extra = false;
+ const auto scope = shader.Scope();
+
switch (num_coordinates) {
case 1: {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
if (is_array) {
if (depth_compare) {
- coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
- array_elem + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + depth_value + ", " +
+ array_elem + ");");
} else {
- coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + array_elem + ");");
}
} else {
if (depth_compare) {
- coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + depth_value + ");");
} else {
- coord = "float coords = " + x + ';';
+ shader.AddLine("float coords = " + x + ';');
}
}
break;
@@ -2614,17 +2632,18 @@ private:
regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
if (is_array) {
if (depth_compare) {
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
- ", " + array_elem + ");";
+ shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " +
+ depth_value + ", " + array_elem + ");");
} else {
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " +
+ array_elem + ");");
}
} else {
if (depth_compare) {
- coord =
- "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " +
+ depth_value + ");");
} else {
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
}
}
break;
@@ -2637,14 +2656,14 @@ private:
regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
if (is_array) {
depth_compare_extra = depth_compare;
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- array_elem + ");";
+ shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
+ array_elem + ");");
} else {
if (depth_compare) {
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- depth_value + ");";
+ shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
+ depth_value + ");");
} else {
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
}
}
break;
@@ -2656,7 +2675,7 @@ private:
// Fallback to interpreting as a 2D texture for now
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
texture_type = Tegra::Shader::TextureType::Texture2D;
}
@@ -2665,79 +2684,61 @@ private:
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
- shader.AddLine(coord);
- std::string texture;
-
- switch (instr.tex.GetTextureProcessMode()) {
- case Tegra::Shader::TextureProcessMode::None: {
- if (!depth_compare_extra) {
- texture = "texture(" + sampler + ", coords)";
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LZ: {
- if (!depth_compare_extra) {
- texture = "textureLod(" + sampler + ", coords, 0.0)";
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LB:
- case Tegra::Shader::TextureProcessMode::LBA: {
- // TODO: Figure if A suffix changes the equation at all.
- if (!depth_compare_extra) {
- texture = "texture(" + sampler + ", coords, " + lod_value + ')';
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- LOG_WARNING(HW_GPU,
- "OpenGL Limitation: can't set bias value along depth compare");
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LL:
- case Tegra::Shader::TextureProcessMode::LLA: {
- // TODO: Figure if A suffix changes the equation at all.
- if (!depth_compare_extra) {
- texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- LOG_WARNING(HW_GPU,
- "OpenGL Limitation: can't set lod value along depth compare");
- }
- break;
- }
- default: {
- if (!depth_compare_extra) {
- texture = "texture(" + sampler + ", coords)";
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
+ const std::string texture = [&]() {
+ switch (instr.tex.GetTextureProcessMode()) {
+ case Tegra::Shader::TextureProcessMode::None:
+ if (depth_compare_extra) {
+ return "texture(" + sampler + ", coords, " + depth_value + ')';
+ }
+ return "texture(" + sampler + ", coords)";
+ case Tegra::Shader::TextureProcessMode::LZ:
+ if (depth_compare_extra) {
+ return "texture(" + sampler + ", coords, " + depth_value + ')';
+ }
+ return "textureLod(" + sampler + ", coords, 0.0)";
+ case Tegra::Shader::TextureProcessMode::LB:
+ case Tegra::Shader::TextureProcessMode::LBA:
+ // TODO: Figure if A suffix changes the equation at all.
+ if (depth_compare_extra) {
+ LOG_WARNING(
+ HW_GPU,
+ "OpenGL Limitation: can't set bias value along depth compare");
+ return "texture(" + sampler + ", coords, " + depth_value + ')';
+ }
+ return "texture(" + sampler + ", coords, " + lod_value + ')';
+ case Tegra::Shader::TextureProcessMode::LL:
+ case Tegra::Shader::TextureProcessMode::LLA:
+ // TODO: Figure if A suffix changes the equation at all.
+ if (depth_compare_extra) {
+ LOG_WARNING(
+ HW_GPU,
+ "OpenGL Limitation: can't set lod value along depth compare");
+ return "texture(" + sampler + ", coords, " + depth_value + ')';
+ }
+ return "textureLod(" + sampler + ", coords, " + lod_value + ')';
+ default:
+ UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
+ static_cast<u32>(instr.tex.GetTextureProcessMode()));
+ if (depth_compare_extra) {
+ return "texture(" + sampler + ", coords, " + depth_value + ')';
+ }
+ return "texture(" + sampler + ", coords)";
}
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.tex.GetTextureProcessMode()));
- }
- }
- if (!depth_compare) {
- shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ }();
+
+ if (depth_compare) {
+ regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
+ } else {
std::size_t dest_elem{};
for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
- regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
- dest_elem);
+ regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
++dest_elem;
}
- } else {
- regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
}
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TEXS: {
@@ -2747,26 +2748,28 @@ private:
UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
+ const auto scope = shader.Scope();
+
const bool depth_compare =
instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
u32 num_coordinates = TextureCoordinates(texture_type);
const auto process_mode = instr.texs.GetTextureProcessMode();
- std::string lod_value;
- std::string coord;
u32 lod_offset = 0;
if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
if (num_coordinates > 2) {
- lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
+ shader.AddLine("float lod_value = " +
+ regs.GetRegisterAsFloat(instr.gpr20.Value() + 1) + ';');
lod_offset = 2;
} else {
- lod_value = regs.GetRegisterAsFloat(instr.gpr20);
+ shader.AddLine("float lod_value = " + regs.GetRegisterAsFloat(instr.gpr20) +
+ ';');
lod_offset = 1;
}
}
switch (num_coordinates) {
case 1: {
- coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
+ shader.AddLine("float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';');
break;
}
case 2: {
@@ -2776,13 +2779,14 @@ private:
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
- ");";
+ shader.AddLine("vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
+ index + ");");
} else {
const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + index +
+ ");");
}
} else {
if (lod_offset != 0) {
@@ -2792,12 +2796,13 @@ private:
regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string z =
regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z +
+ ");");
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y =
regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
}
} else {
if (depth_compare) {
@@ -2805,11 +2810,12 @@ private:
const std::string y =
regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z +
+ ");");
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
}
}
}
@@ -2819,7 +2825,7 @@ private:
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
break;
}
default:
@@ -2829,42 +2835,37 @@ private:
// Fallback to interpreting as a 2D texture for now
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
texture_type = Tegra::Shader::TextureType::Texture2D;
is_array = false;
}
const std::string sampler =
GetSampler(instr.sampler, texture_type, is_array, depth_compare);
- std::string texture;
- switch (process_mode) {
- case Tegra::Shader::TextureProcessMode::None: {
- texture = "texture(" + sampler + ", coords)";
- break;
- }
- case Tegra::Shader::TextureProcessMode::LZ: {
- if (depth_compare && is_array) {
- texture = "texture(" + sampler + ", coords)";
- } else {
- texture = "textureLod(" + sampler + ", coords, 0.0)";
+
+ std::string texture = [&]() {
+ switch (process_mode) {
+ case Tegra::Shader::TextureProcessMode::None:
+ return "texture(" + sampler + ", coords)";
+ case Tegra::Shader::TextureProcessMode::LZ:
+ if (depth_compare && is_array) {
+ return "texture(" + sampler + ", coords)";
+ } else {
+ return "textureLod(" + sampler + ", coords, 0.0)";
+ }
+ break;
+ case Tegra::Shader::TextureProcessMode::LL:
+ return "textureLod(" + sampler + ", coords, lod_value)";
+ default:
+ UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
+ static_cast<u32>(instr.texs.GetTextureProcessMode()));
+ return "texture(" + sampler + ", coords)";
}
- break;
- }
- case Tegra::Shader::TextureProcessMode::LL: {
- texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
- break;
- }
- default: {
- texture = "texture(" + sampler + ", coords)";
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.texs.GetTextureProcessMode()));
- }
- }
- if (!depth_compare) {
- WriteTexsInstruction(instr, coord, texture);
- } else {
- WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
+ }();
+ if (depth_compare) {
+ texture = "vec4(" + texture + ')';
}
+ WriteTexsInstruction(instr, texture);
break;
}
case OpCode::Id::TLDS: {
@@ -2883,15 +2884,12 @@ private:
u32 extra_op_offset = 0;
- // Scope to avoid variable name overlaps.
- shader.AddLine('{');
- ++shader.scope;
- std::string coords;
+ ShaderScopedScope scope = shader.Scope();
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D: {
const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
- coords = "float coords = " + x + ';';
+ shader.AddLine("float coords = " + x + ';');
break;
}
case Tegra::Shader::TextureType::Texture2D: {
@@ -2900,7 +2898,7 @@ private:
const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
// shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
- coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+ shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
extra_op_offset = 1;
break;
}
@@ -2909,35 +2907,29 @@ private:
}
const std::string sampler =
GetSampler(instr.sampler, texture_type, is_array, false);
- std::string texture = "texelFetch(" + sampler + ", coords, 0)";
- switch (instr.tlds.GetTextureProcessMode()) {
- case Tegra::Shader::TextureProcessMode::LZ: {
- texture = "texelFetch(" + sampler + ", coords, 0)";
- break;
- }
- case Tegra::Shader::TextureProcessMode::LL: {
- shader.AddLine(
- "float lod = " +
- regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
- texture = "texelFetch(" + sampler + ", coords, lod)";
- break;
- }
- default: {
- texture = "texelFetch(" + sampler + ", coords, 0)";
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.tlds.GetTextureProcessMode()));
- }
- }
- WriteTexsInstruction(instr, coords, texture);
- --shader.scope;
- shader.AddLine('}');
+ const std::string texture = [&]() {
+ switch (instr.tlds.GetTextureProcessMode()) {
+ case Tegra::Shader::TextureProcessMode::LZ:
+ return "texelFetch(" + sampler + ", coords, 0)";
+ case Tegra::Shader::TextureProcessMode::LL:
+ shader.AddLine(
+ "float lod = " +
+ regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
+ return "texelFetch(" + sampler + ", coords, lod)";
+ default:
+ UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
+ static_cast<u32>(instr.tlds.GetTextureProcessMode()));
+ return "texelFetch(" + sampler + ", coords, 0)";
+ }
+ }();
+
+ WriteTexsInstruction(instr, texture);
break;
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
ASSERT(instr.tld4.array == 0);
- std::string coord;
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
@@ -2954,10 +2946,7 @@ private:
if (depth_compare)
num_coordinates += 1;
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
switch (num_coordinates) {
case 2: {
@@ -2988,23 +2977,19 @@ private:
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4.component) + ')';
- if (!depth_compare) {
- shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ if (depth_compare) {
+ regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
+ } else {
std::size_t dest_elem{};
for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
- regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
- dest_elem);
+ regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
++dest_elem;
}
- } else {
- regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
}
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TLD4S: {
@@ -3015,10 +3000,7 @@ private:
instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- // Scope to avoid variable name overlaps.
- shader.AddLine('{');
- ++shader.scope;
- std::string coords;
+ const auto scope = shader.Scope();
const bool depth_compare =
instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
@@ -3027,33 +3009,29 @@ private:
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
const std::string sampler = GetSampler(
instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
- if (!depth_compare) {
- coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
- } else {
+ if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
- }
- const std::string texture = "textureGather(" + sampler + ", coords, " +
- std::to_string(instr.tld4s.component) + ')';
-
- if (!depth_compare) {
- WriteTexsInstruction(instr, coords, texture);
+ shader.AddLine("vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");");
} else {
- WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
+ shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");");
}
- --shader.scope;
- shader.AddLine('}');
+ std::string texture = "textureGather(" + sampler + ", coords, " +
+ std::to_string(instr.tld4s.component) + ')';
+ if (depth_compare) {
+ texture = "vec4(" + texture + ')';
+ }
+ WriteTexsInstruction(instr, texture);
break;
}
case OpCode::Id::TXQ: {
UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
- ++shader.scope;
- shader.AddLine('{');
- // TODO: the new commits on the texture refactor, change the way samplers work.
+ const auto scope = shader.Scope();
+
+ // TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const std::string sampler =
@@ -3064,7 +3042,8 @@ private:
regs.GetRegisterAsInteger(instr.gpr8) + ')';
const std::string mip_level = "textureQueryLevels(" + sampler + ')';
shader.AddLine("ivec2 sizes = " + texture + ';');
- regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1);
+
+ regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
@@ -3075,8 +3054,6 @@ private:
static_cast<u32>(instr.txq.query_type.Value()));
}
}
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TMML: {
@@ -3091,17 +3068,18 @@ private:
const std::string sampler =
GetSampler(instr.sampler, texture_type, is_array, false);
- // TODO: add coordinates for different samplers once other texture types are
+ const auto scope = shader.Scope();
+
+ // TODO: Add coordinates for different samplers once other texture types are
// implemented.
- std::string coord;
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D: {
- coord = "float coords = " + x + ';';
+ shader.AddLine("float coords = " + x + ';');
break;
}
case Tegra::Shader::TextureType::Texture2D: {
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
break;
}
default:
@@ -3109,22 +3087,15 @@ private:
// Fallback to interpreting as a 2D texture for now
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
texture_type = Tegra::Shader::TextureType::Texture2D;
}
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
- shader.AddLine(coord);
+
const std::string texture = "textureQueryLod(" + sampler + ", coords)";
- const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
- shader.AddLine(tmp);
+ shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);");
regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
- --shader.scope;
- shader.AddLine('}');
break;
}
default: {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index b425d98ae..4fa6d7612 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -163,6 +163,7 @@ private:
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffer_entries;
std::vector<SamplerEntry> texture_samplers;
+ std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
std::size_t shader_length;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index b757f5f44..4970aafed 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -60,6 +60,17 @@ public:
}
void ApplyTo(OpenGLState& state) {
+ UpdatePipeline();
+ state.draw.shader_program = 0;
+ state.draw.program_pipeline = pipeline.handle;
+ state.geometry_shaders.enabled = (gs != 0);
+ }
+
+private:
+ void UpdatePipeline() {
+ // Avoid updating the pipeline when values have no changed
+ if (old_vs == vs && old_fs == fs && old_gs == gs)
+ return;
// Workaround for AMD bug
glUseProgramStages(pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
@@ -68,14 +79,16 @@ public:
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
- state.draw.shader_program = 0;
- state.draw.program_pipeline = pipeline.handle;
- state.geometry_shaders.enabled = (gs != 0);
+
+ // Update the old values
+ old_vs = vs;
+ old_fs = fs;
+ old_gs = gs;
}
-private:
OGLPipeline pipeline;
GLuint vs{}, fs{}, gs{};
+ GLuint old_vs{}, old_fs{}, old_gs{};
};
} // namespace OpenGL::GLShader