aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp141
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp249
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_state.h29
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h22
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp15
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
10 files changed, 408 insertions, 181 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 01d89f47d..4dd08bccb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -331,7 +331,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupDrawConstBuffers(stage_enum, shader);
SetupDrawGlobalMemory(stage_enum, shader);
- const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
+ const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
@@ -489,9 +489,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
texture_cache.MarkColorBufferInUse(*single_color_target);
- // Workaround for and issue in nvidia drivers
- // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
- state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
}
fbkey.is_single_buffer = true;
@@ -512,11 +509,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
texture_cache.MarkColorBufferInUse(index);
- // Enable sRGB only for supported formats
- // Workaround for and issue in nvidia drivers
- // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
- state.framebuffer_srgb.enabled |=
- color_surface->GetSurfaceParams().srgb_conversion;
}
fbkey.color_attachments[index] =
@@ -801,7 +793,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
}
auto kernel = shader_cache.GetComputeKernel(code_addr);
- const auto [program, next_bindings] = kernel->GetProgramHandle({});
+ ProgramVariant variant;
+ variant.texture_buffer_usage = SetupComputeTextures(kernel);
+ SetupComputeImages(kernel);
+
+ const auto [program, next_bindings] = kernel->GetProgramHandle(variant);
state.draw.shader_program = program;
state.draw.program_pipeline = 0;
@@ -816,13 +812,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
- // TODO(Rodrigo): Bind images and samplers
-
buffer_cache.Unmap();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
+ state.ApplyTextures();
+ state.ApplyImages();
state.ApplyShaderProgram();
state.ApplyProgramPipeline();
@@ -902,6 +898,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
screen_info.display_texture = surface->GetTexture();
+ screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
return true;
}
@@ -922,7 +919,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
+ const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
buffer.address = config.Address();
buffer.size = config.size;
@@ -981,53 +978,125 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
}
-TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
- BaseBindings base_bindings) {
+TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage,
+ const Shader& shader,
+ BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
- ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
+ ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures),
"Exceeded the number of active textures.");
TextureBufferUsage texture_buffer_usage{0};
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- Tegra::Texture::FullTextureInfo texture;
- if (entry.IsBindless()) {
+ const auto texture = [&]() {
+ if (!entry.IsBindless()) {
+ return maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ }
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
- texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
- } else {
- texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+ }();
+
+ if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
+ texture_buffer_usage.set(bindpoint);
}
- const u32 current_bindpoint = base_bindings.sampler + bindpoint;
+ }
- auto& unit{state.texture_units[current_bindpoint]};
- unit.sampler = sampler_cache.GetSampler(texture.tsc);
+ return texture_buffer_usage;
+}
- if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) {
- if (view->GetSurfaceParams().IsBuffer()) {
- // Record that this texture is a texture buffer.
- texture_buffer_usage.set(bindpoint);
- } else {
- // Apply swizzle to textures that are not buffers.
- view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
- texture.tic.w_source);
+TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+ MICROPROFILE_SCOPE(OpenGL_Texture);
+ const auto& compute = system.GPU().KeplerCompute();
+ const auto& entries = kernel->GetShaderEntries().samplers;
+
+ ASSERT_MSG(entries.size() <= std::size(state.textures),
+ "Exceeded the number of active textures.");
+
+ TextureBufferUsage texture_buffer_usage{0};
+
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const auto texture = [&]() {
+ if (!entry.IsBindless()) {
+ return compute.GetTexture(entry.GetOffset());
}
- state.texture_units[current_bindpoint].texture = view->GetTexture();
- } else {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- unit.texture = 0;
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
+ return compute.GetTextureInfo(tex_handle, entry.GetOffset());
+ }();
+
+ if (SetupTexture(bindpoint, texture, entry)) {
+ texture_buffer_usage.set(bindpoint);
}
}
return texture_buffer_usage;
}
+bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+ const GLShader::SamplerEntry& entry) {
+ state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
+
+ const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
+ if (!view) {
+ // Can occur when texture addr is null or its memory is unmapped/invalid
+ state.textures[binding] = 0;
+ return false;
+ }
+ state.textures[binding] = view->GetTexture();
+
+ if (view->GetSurfaceParams().IsBuffer()) {
+ return true;
+ }
+
+ // Apply swizzle to textures that are not buffers.
+ view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
+ texture.tic.w_source);
+ return false;
+}
+
+void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+ const auto& compute = system.GPU().KeplerCompute();
+ const auto& entries = shader->GetShaderEntries().images;
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const auto tic = [&]() {
+ if (!entry.IsBindless()) {
+ return compute.GetTexture(entry.GetOffset()).tic;
+ }
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
+ return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
+ }();
+ SetupImage(bindpoint, tic, entry);
+ }
+}
+
+void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
+ const GLShader::ImageEntry& entry) {
+ const auto view = texture_cache.GetImageSurface(tic, entry);
+ if (!view) {
+ state.images[binding] = 0;
+ return;
+ }
+ if (!tic.IsBuffer()) {
+ view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ }
+ if (entry.IsWritten()) {
+ view->MarkAsModified(texture_cache.Tick());
+ }
+ state.images[binding] = view->GetTexture();
+}
+
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
const auto& regs = system.GPU().Maxwell3D().regs;
const bool geometry_shaders_enabled =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9d20a4fbf..eada752e0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -32,6 +32,7 @@
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
+#include "video_core/textures/texture.h"
namespace Core {
class System;
@@ -137,8 +138,22 @@ private:
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
- TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, BaseBindings base_bindings);
+ TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader, BaseBindings base_bindings);
+
+ /// Configures the textures used in a compute shader. Returns texture buffer usage.
+ TextureBufferUsage SetupComputeTextures(const Shader& kernel);
+
+ /// Configures a texture. Returns true when the texture is a texture buffer.
+ bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+ const GLShader::SamplerEntry& entry);
+
+ /// Configures images in a compute shader.
+ void SetupComputeImages(const Shader& shader);
+
+ /// Configures an image.
+ void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
+ const GLShader::ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 909ccb82c..0dbc4c02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
std::string source = "#version 430 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_NV_gpu_shader5 : enable\n"
- "#extension GL_NV_shader_thread_group : enable\n";
+ "#extension GL_NV_shader_thread_group : enable\n"
+ "#extension GL_NV_shader_thread_shuffle : enable\n";
if (entries.shader_viewport_layer_array) {
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4a8c7edc9..76439e7ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -325,6 +325,7 @@ public:
DeclareRegisters();
DeclarePredicates();
DeclareLocalMemory();
+ DeclareSharedMemory();
DeclareInternalFlags();
DeclareInputAttributes();
DeclareOutputAttributes();
@@ -389,11 +390,10 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
- for (const auto& image : ir.GetImages()) {
+ for (const auto& [offset, image] : ir.GetImages()) {
entries.images.emplace_back(image);
}
- for (const auto& gmem_pair : ir.GetGlobalMemory()) {
- const auto& [base, usage] = gmem_pair;
+ for (const auto& [base, usage] : ir.GetGlobalMemory()) {
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
usage.is_read, usage.is_written);
}
@@ -500,6 +500,13 @@ private:
code.AddNewLine();
}
+ void DeclareSharedMemory() {
+ if (stage != ProgramType::Compute) {
+ return;
+ }
+ code.AddLine("shared uint {}[];", GetSharedMemory());
+ }
+
void DeclareInternalFlags() {
for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
const auto flag_code = static_cast<InternalFlag>(flag);
@@ -706,8 +713,8 @@ private:
void DeclareImages() {
const auto& images{ir.GetImages()};
- for (const auto& image : images) {
- const std::string image_type = [&]() {
+ for (const auto& [offset, image] : images) {
+ const char* image_type = [&] {
switch (image.GetType()) {
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
@@ -726,9 +733,33 @@ private:
return "image1D";
}
}();
- code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform "
+
+ const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> {
+ if (!image.IsSizeKnown()) {
+ return {"", ""};
+ }
+ switch (image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return {"u", "r32ui, "};
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return {"i", "r32i, "};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented atomic size={}",
+ static_cast<u32>(image.GetSize()));
+ return {"", ""};
+ }
+ }();
+
+ std::string qualifier = "coherent volatile";
+ if (image.IsRead() && !image.IsWritten()) {
+ qualifier += " readonly";
+ } else if (image.IsWritten() && !image.IsRead()) {
+ qualifier += " writeonly";
+ }
+
+ code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform "
"{} {};",
- image.GetIndex(), image_type, GetImage(image));
+ image.GetIndex(), qualifier, image_type, GetImage(image));
}
if (!images.empty()) {
code.AddNewLine();
@@ -858,6 +889,12 @@ private:
Type::Uint};
}
+ if (const auto smem = std::get_if<SmemNode>(&*node)) {
+ return {
+ fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
+ Type::Uint};
+ }
+
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
}
@@ -1174,6 +1211,74 @@ private:
return expr;
}
+ std::string BuildIntegerCoordinates(Operation operation) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ const std::size_t coords_count{operation.GetOperandsCount()};
+ std::string expr = constructors.at(coords_count - 1);
+ for (std::size_t i = 0; i < coords_count; ++i) {
+ expr += VisitOperand(operation, i).AsInt();
+ if (i + 1 < coords_count) {
+ expr += ", ";
+ }
+ }
+ expr += ')';
+ return expr;
+ }
+
+ std::string BuildImageValues(Operation operation) {
+ const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> {
+ constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"};
+ if (!meta.image.IsSizeKnown()) {
+ return {float_constructors, Type::Float};
+ }
+ switch (meta.image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint};
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image size={}",
+ static_cast<u32>(meta.image.GetSize()));
+ return {float_constructors, Type::Float};
+ }
+ }();
+
+ const std::size_t values_count{meta.values.size()};
+ std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
+ for (std::size_t i = 0; i < values_count; ++i) {
+ expr += Visit(meta.values.at(i)).As(type);
+ if (i + 1 < values_count) {
+ expr += ", ";
+ }
+ }
+ expr += ')';
+ return expr;
+ }
+
+ Expression AtomicImage(Operation operation, const char* opname) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ ASSERT(meta.values.size() == 1);
+ ASSERT(meta.image.IsSizeKnown());
+
+ const auto type = [&]() {
+ switch (const auto size = meta.image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return Type::Uint;
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return Type::Int;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size));
+ return Type::Uint;
+ }
+ }();
+
+ return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image),
+ BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)),
+ type};
+ }
+
Expression Assign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -1199,6 +1304,11 @@ private:
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
+ } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
+ ASSERT(stage == ProgramType::Compute);
+ target = {
+ fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
+ Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress()).AsUint();
const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -1692,36 +1802,37 @@ private:
}
Expression ImageStore(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
+ BuildIntegerCoordinates(operation), BuildImageValues(operation));
+ return {};
+ }
- std::string expr = "imageStore(";
- expr += GetImage(meta.image);
- expr += ", ";
+ Expression AtomicImageAdd(Operation operation) {
+ return AtomicImage(operation, "imageAtomicAdd");
+ }
- const std::size_t coords_count{operation.GetOperandsCount()};
- expr += constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += "), ";
+ Expression AtomicImageMin(Operation operation) {
+ return AtomicImage(operation, "imageAtomicMin");
+ }
- const std::size_t values_count{meta.values.size()};
- UNIMPLEMENTED_IF(values_count != 4);
- expr += "vec4(";
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsFloat();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += "));";
+ Expression AtomicImageMax(Operation operation) {
+ return AtomicImage(operation, "imageAtomicMax");
+ }
+ Expression AtomicImageAnd(Operation operation) {
+ return AtomicImage(operation, "imageAtomicAnd");
+ }
- code.AddLine(expr);
- return {};
+ Expression AtomicImageOr(Operation operation) {
+ return AtomicImage(operation, "imageAtomicOr");
+ }
+
+ Expression AtomicImageXor(Operation operation) {
+ return AtomicImage(operation, "imageAtomicXor");
+ }
+
+ Expression AtomicImageExchange(Operation operation) {
+ return AtomicImage(operation, "imageAtomicExchange");
}
Expression Branch(Operation operation) {
@@ -1846,8 +1957,7 @@ private:
Expression BallotThread(Operation operation) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia warp intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub on non-Nvidia devices by simulating all threads voting the same as the active
// one.
return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
@@ -1858,8 +1968,7 @@ private:
Expression Vote(Operation operation, const char* func) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub with a warp size of one.
return {value, Type::Bool};
}
@@ -1876,15 +1985,54 @@ private:
Expression VoteEqual(Operation operation) {
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
- // We must return true here since a stub for a theoretical warp size of 1 will always
- // return an equal result for all its votes.
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
+ // We must return true here since a stub for a theoretical warp size of 1.
+ // This will always return an equal result across all votes.
return {"true", Type::Bool};
}
return Vote(operation, "allThreadsEqualNV");
}
+ template <const std::string_view& func>
+ Expression Shuffle(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsFloat();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
+ // On a "single-thread" device we are either on the same thread or out of bounds. Both
+ // cases return the passed value.
+ return {value, Type::Float};
+ }
+
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ const std::string width = VisitOperand(operation, 2).AsUint();
+ return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ }
+
+ template <const std::string_view& func>
+ Expression InRangeShuffle(Operation operation) {
+ const std::string index = VisitOperand(operation, 0).AsUint();
+ const std::string width = VisitOperand(operation, 1).AsUint();
+ if (!device.HasWarpIntrinsics()) {
+ // On a "single-thread" device we are only in bounds when the requested index is 0.
+ return {fmt::format("({} == 0U)", index), Type::Bool};
+ }
+
+ const std::string in_range = code.GenerateTemporary();
+ code.AddLine("bool {};", in_range);
+ code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
+ return {in_range, Type::Bool};
+ }
+
+ struct Func final {
+ Func() = delete;
+ ~Func() = delete;
+
+ static constexpr std::string_view ShuffleIndexed = "shuffleNV";
+ static constexpr std::string_view ShuffleUp = "shuffleUpNV";
+ static constexpr std::string_view ShuffleDown = "shuffleDownNV";
+ static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
+ };
+
static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
@@ -2017,6 +2165,13 @@ private:
&GLSLDecompiler::TexelFetch,
&GLSLDecompiler::ImageStore,
+ &GLSLDecompiler::AtomicImageAdd,
+ &GLSLDecompiler::AtomicImageMin,
+ &GLSLDecompiler::AtomicImageMax,
+ &GLSLDecompiler::AtomicImageAnd,
+ &GLSLDecompiler::AtomicImageOr,
+ &GLSLDecompiler::AtomicImageXor,
+ &GLSLDecompiler::AtomicImageExchange,
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
@@ -2040,6 +2195,16 @@ private:
&GLSLDecompiler::VoteAll,
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
+
+ &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
+
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2080,6 +2245,10 @@ private:
return "lmem_" + suffix;
}
+ std::string GetSharedMemory() const {
+ return fmt::format("smem_{}", suffix);
+ }
+
std::string GetInternalFlag(InternalFlag flag) const {
constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
"overflow_flag"};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 969fe9ced..f141c4e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -341,13 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u64 index{};
u32 type{};
u8 is_bindless{};
+ u8 is_written{};
+ u8 is_read{};
+ u8 is_size_known{};
+ u32 size{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
- !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
+ !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
+ !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
+ !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
+ static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
+ is_read != 0,
+ is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size))
+ : std::nullopt);
}
u32 global_memory_count{};
@@ -426,10 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
return false;
}
for (const auto& image : entries.images) {
+ const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U;
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) {
return false;
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f4777d0b0..bf86b5a0b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -16,7 +16,6 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
OpenGLState OpenGLState::cur_state;
-bool OpenGLState::s_rgb_used;
namespace {
@@ -34,6 +33,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) {
return changed;
}
+template <typename T>
+std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
+ std::optional<std::size_t> first;
+ std::size_t last;
+ for (std::size_t i = 0; i < std::size(current_values); ++i) {
+ if (!UpdateValue(current_values[i], new_values[i])) {
+ continue;
+ }
+ if (!first) {
+ first = i;
+ }
+ last = i;
+ }
+ if (!first) {
+ return std::nullopt;
+ }
+ return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
+}
+
void Enable(GLenum cap, bool enable) {
if (enable) {
glEnable(cap);
@@ -134,10 +152,6 @@ OpenGLState::OpenGLState() {
logic_op.enabled = false;
logic_op.operation = GL_COPY;
- for (auto& texture_unit : texture_units) {
- texture_unit.Reset();
- }
-
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
@@ -267,8 +281,6 @@ void OpenGLState::ApplySRgb() const {
return;
cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
if (framebuffer_srgb.enabled) {
- // Track if sRGB is used
- s_rgb_used = true;
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
@@ -496,52 +508,20 @@ void OpenGLState::ApplyAlphaTest() const {
}
void OpenGLState::ApplyTextures() const {
- bool has_delta{};
- std::size_t first{};
- std::size_t last{};
- std::array<GLuint, Maxwell::NumTextureSamplers> textures;
-
- for (std::size_t i = 0; i < std::size(texture_units); ++i) {
- const auto& texture_unit = texture_units[i];
- auto& cur_state_texture_unit = cur_state.texture_units[i];
- textures[i] = texture_unit.texture;
- if (cur_state_texture_unit.texture == textures[i]) {
- continue;
- }
- cur_state_texture_unit.texture = textures[i];
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
- }
- if (has_delta) {
- glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- textures.data() + first);
+ if (const auto update = UpdateArray(cur_state.textures, textures)) {
+ glBindTextures(update->first, update->second, textures.data() + update->first);
}
}
void OpenGLState::ApplySamplers() const {
- bool has_delta{};
- std::size_t first{};
- std::size_t last{};
- std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
-
- for (std::size_t i = 0; i < std::size(samplers); ++i) {
- samplers[i] = texture_units[i].sampler;
- if (cur_state.texture_units[i].sampler == texture_units[i].sampler) {
- continue;
- }
- cur_state.texture_units[i].sampler = texture_units[i].sampler;
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
+ glBindSamplers(update->first, update->second, samplers.data() + update->first);
}
- if (has_delta) {
- glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- samplers.data() + first);
+}
+
+void OpenGLState::ApplyImages() const {
+ if (const auto update = UpdateArray(cur_state.images, images)) {
+ glBindImageTextures(update->first, update->second, images.data() + update->first);
}
}
@@ -576,6 +556,7 @@ void OpenGLState::Apply() {
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
+ ApplyImages();
if (dirty.polygon_offset) {
ApplyPolygonOffset();
dirty.polygon_offset = false;
@@ -606,18 +587,18 @@ void OpenGLState::EmulateViewportWithScissor() {
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
- for (auto& unit : texture_units) {
- if (unit.texture == handle) {
- unit.Unbind();
+ for (auto& texture : textures) {
+ if (texture == handle) {
+ texture = 0;
}
}
return *this;
}
OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
- for (auto& unit : texture_units) {
- if (unit.sampler == handle) {
- unit.sampler = 0;
+ for (auto& sampler : samplers) {
+ if (sampler == handle) {
+ sampler = 0;
}
}
return *this;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fdf9a8a12..c358d3b38 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -118,21 +118,9 @@ public:
GLenum operation;
} logic_op;
- // 3 texture units - one for each that is used in PICA fragment shader emulation
- struct TextureUnit {
- GLuint texture; // GL_TEXTURE_BINDING_2D
- GLuint sampler; // GL_SAMPLER_BINDING
-
- void Unbind() {
- texture = 0;
- }
-
- void Reset() {
- Unbind();
- sampler = 0;
- }
- };
- std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{};
struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
@@ -187,14 +175,6 @@ public:
return cur_state;
}
- static bool GetsRGBUsed() {
- return s_rgb_used;
- }
-
- static void ClearsRGBUsed() {
- s_rgb_used = false;
- }
-
void SetDefaultViewports();
/// Apply this state as the current OpenGL state
void Apply();
@@ -220,6 +200,7 @@ public:
void ApplyLogicOp() const;
void ApplyTextures() const;
void ApplySamplers() const;
+ void ApplyImages() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
void ApplyAlphaTest() const;
@@ -264,8 +245,6 @@ public:
private:
static OpenGLState cur_state;
- // Workaround for sRGB problems caused by QT not supporting srgb output
- static bool s_rgb_used;
struct {
bool blend_state;
bool stencil_state;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 21324488a..8e13ab38b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -78,6 +78,17 @@ public:
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
void Attach(GLenum attachment, GLenum target) const;
+ void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
+ Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source,
+ Tegra::Texture::SwizzleSource w_source);
+
+ void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+
+ void MarkAsModified(u64 tick) {
+ surface.MarkAsModified(true, tick);
+ }
+
GLuint GetTexture() const {
if (is_proxy) {
return surface.GetTexture();
@@ -89,13 +100,6 @@ public:
return surface.GetSurfaceParams();
}
- void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
-
- void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
-
private:
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
@@ -111,8 +115,8 @@ private:
GLenum target{};
OGLTextureView texture_view;
- u32 swizzle;
- bool is_proxy;
+ u32 swizzle{};
+ bool is_proxy{};
};
class TextureCacheOpenGL final : public TextureCacheBase {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index af9684839..1e6ef66ab 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -264,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- OpenGLState::ClearsRGBUsed();
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
}
@@ -342,21 +341,17 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
}};
- state.texture_units[0].texture = screen_info.display_texture;
- // Workaround brigthness problems in SMO by enabling sRGB in the final output
- // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
- state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
+ state.textures[0] = screen_info.display_texture;
+ state.framebuffer_srgb.enabled = screen_info.display_srgb;
state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
- state.texture_units[0].texture = 0;
+ state.textures[0] = 0;
state.AllDirty();
state.Apply();
- // Clear sRGB state for the next frame
- OpenGLState::ClearsRGBUsed();
}
/**
@@ -406,8 +401,8 @@ void RendererOpenGL::CaptureScreenshot() {
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
- glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
- layout.height);
+ glRenderbufferStorage(GL_RENDERBUFFER, screen_info.display_srgb ? GL_SRGB8 : GL_RGB8,
+ layout.width, layout.height);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
DrawScreen(layout);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 9bd086368..cf26628ca 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -38,7 +38,8 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
- GLuint display_texture;
+ GLuint display_texture{};
+ bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};