diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 141 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 249 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 89 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 3 |
10 files changed, 408 insertions, 181 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 01d89f47d..4dd08bccb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -331,7 +331,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); SetupDrawConstBuffers(stage_enum, shader); SetupDrawGlobalMemory(stage_enum, shader); - const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; + const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)}; const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); @@ -489,9 +489,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( // Assume that a surface will be written to if it is used as a framebuffer, even if // the shader doesn't actually write to it. texture_cache.MarkColorBufferInUse(*single_color_target); - // Workaround for and issue in nvidia drivers - // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } fbkey.is_single_buffer = true; @@ -512,11 +509,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( // Assume that a surface will be written to if it is used as a framebuffer, even // if the shader doesn't actually write to it. texture_cache.MarkColorBufferInUse(index); - // Enable sRGB only for supported formats - // Workaround for and issue in nvidia drivers - // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ - state.framebuffer_srgb.enabled |= - color_surface->GetSurfaceParams().srgb_conversion; } fbkey.color_attachments[index] = @@ -801,7 +793,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } auto kernel = shader_cache.GetComputeKernel(code_addr); - const auto [program, next_bindings] = kernel->GetProgramHandle({}); + ProgramVariant variant; + variant.texture_buffer_usage = SetupComputeTextures(kernel); + SetupComputeImages(kernel); + + const auto [program, next_bindings] = kernel->GetProgramHandle(variant); state.draw.shader_program = program; state.draw.program_pipeline = 0; @@ -816,13 +812,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); - // TODO(Rodrigo): Bind images and samplers - buffer_cache.Unmap(); bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); + state.ApplyTextures(); + state.ApplyImages(); state.ApplyShaderProgram(); state.ApplyProgramPipeline(); @@ -902,6 +898,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, } screen_info.display_texture = surface->GetTexture(); + screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; return true; } @@ -922,7 +919,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; for (const auto& entry : kernel->GetShaderEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; - const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); + const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); Tegra::Engines::ConstBufferInfo buffer; buffer.address = config.Address(); buffer.size = config.size; @@ -981,53 +978,125 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); } -TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - BaseBindings base_bindings) { +TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, + const Shader& shader, + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; - ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), + ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), "Exceeded the number of active textures."); TextureBufferUsage texture_buffer_usage{0}; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry = entries[bindpoint]; - Tegra::Texture::FullTextureInfo texture; - if (entry.IsBindless()) { + const auto texture = [&]() { + if (!entry.IsBindless()) { + return maxwell3d.GetStageTexture(stage, entry.GetOffset()); + } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); - texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); - } else { - texture = maxwell3d.GetStageTexture(stage, entry.GetOffset()); + return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); + }(); + + if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { + texture_buffer_usage.set(bindpoint); } - const u32 current_bindpoint = base_bindings.sampler + bindpoint; + } - auto& unit{state.texture_units[current_bindpoint]}; - unit.sampler = sampler_cache.GetSampler(texture.tsc); + return texture_buffer_usage; +} - if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { - if (view->GetSurfaceParams().IsBuffer()) { - // Record that this texture is a texture buffer. - texture_buffer_usage.set(bindpoint); - } else { - // Apply swizzle to textures that are not buffers. - view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, - texture.tic.w_source); +TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { + MICROPROFILE_SCOPE(OpenGL_Texture); + const auto& compute = system.GPU().KeplerCompute(); + const auto& entries = kernel->GetShaderEntries().samplers; + + ASSERT_MSG(entries.size() <= std::size(state.textures), + "Exceeded the number of active textures."); + + TextureBufferUsage texture_buffer_usage{0}; + + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto texture = [&]() { + if (!entry.IsBindless()) { + return compute.GetTexture(entry.GetOffset()); } - state.texture_units[current_bindpoint].texture = view->GetTexture(); - } else { - // Can occur when texture addr is null or its memory is unmapped/invalid - unit.texture = 0; + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + return compute.GetTextureInfo(tex_handle, entry.GetOffset()); + }(); + + if (SetupTexture(bindpoint, texture, entry)) { + texture_buffer_usage.set(bindpoint); } } return texture_buffer_usage; } +bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + const GLShader::SamplerEntry& entry) { + state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); + + const auto view = texture_cache.GetTextureSurface(texture.tic, entry); + if (!view) { + // Can occur when texture addr is null or its memory is unmapped/invalid + state.textures[binding] = 0; + return false; + } + state.textures[binding] = view->GetTexture(); + + if (view->GetSurfaceParams().IsBuffer()) { + return true; + } + + // Apply swizzle to textures that are not buffers. + view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, + texture.tic.w_source); + return false; +} + +void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { + const auto& compute = system.GPU().KeplerCompute(); + const auto& entries = shader->GetShaderEntries().images; + for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry = entries[bindpoint]; + const auto tic = [&]() { + if (!entry.IsBindless()) { + return compute.GetTexture(entry.GetOffset()).tic; + } + const auto cbuf = entry.GetBindlessCBuf(); + Tegra::Texture::TextureHandle tex_handle; + tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); + return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; + }(); + SetupImage(bindpoint, tic, entry); + } +} + +void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, + const GLShader::ImageEntry& entry) { + const auto view = texture_cache.GetImageSurface(tic, entry); + if (!view) { + state.images[binding] = 0; + return; + } + if (!tic.IsBuffer()) { + view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + } + if (entry.IsWritten()) { + view->MarkAsModified(texture_cache.Tick()); + } + state.images[binding] = view->GetTexture(); +} + void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { const auto& regs = system.GPU().Maxwell3D().regs; const bool geometry_shaders_enabled = diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9d20a4fbf..eada752e0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/textures/texture.h" namespace Core { class System; @@ -137,8 +138,22 @@ private: /// Configures the current textures to use for the draw command. Returns shaders texture buffer /// usage. - TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, BaseBindings base_bindings); + TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader, BaseBindings base_bindings); + + /// Configures the textures used in a compute shader. Returns texture buffer usage. + TextureBufferUsage SetupComputeTextures(const Shader& kernel); + + /// Configures a texture. Returns true when the texture is a texture buffer. + bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, + const GLShader::SamplerEntry& entry); + + /// Configures images in a compute shader. + void SetupComputeImages(const Shader& shader); + + /// Configures an image. + void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, + const GLShader::ImageEntry& entry); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 909ccb82c..0dbc4c02f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn std::string source = "#version 430 core\n" "#extension GL_ARB_separate_shader_objects : enable\n" "#extension GL_NV_gpu_shader5 : enable\n" - "#extension GL_NV_shader_thread_group : enable\n"; + "#extension GL_NV_shader_thread_group : enable\n" + "#extension GL_NV_shader_thread_shuffle : enable\n"; if (entries.shader_viewport_layer_array) { source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4a8c7edc9..76439e7ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -325,6 +325,7 @@ public: DeclareRegisters(); DeclarePredicates(); DeclareLocalMemory(); + DeclareSharedMemory(); DeclareInternalFlags(); DeclareInputAttributes(); DeclareOutputAttributes(); @@ -389,11 +390,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& image : ir.GetImages()) { + for (const auto& [offset, image] : ir.GetImages()) { entries.images.emplace_back(image); } - for (const auto& gmem_pair : ir.GetGlobalMemory()) { - const auto& [base, usage] = gmem_pair; + for (const auto& [base, usage] : ir.GetGlobalMemory()) { entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, usage.is_written); } @@ -500,6 +500,13 @@ private: code.AddNewLine(); } + void DeclareSharedMemory() { + if (stage != ProgramType::Compute) { + return; + } + code.AddLine("shared uint {}[];", GetSharedMemory()); + } + void DeclareInternalFlags() { for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { const auto flag_code = static_cast<InternalFlag>(flag); @@ -706,8 +713,8 @@ private: void DeclareImages() { const auto& images{ir.GetImages()}; - for (const auto& image : images) { - const std::string image_type = [&]() { + for (const auto& [offset, image] : images) { + const char* image_type = [&] { switch (image.GetType()) { case Tegra::Shader::ImageType::Texture1D: return "image1D"; @@ -726,9 +733,33 @@ private: return "image1D"; } }(); - code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform " + + const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> { + if (!image.IsSizeKnown()) { + return {"", ""}; + } + switch (image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {"u", "r32ui, "}; + case Tegra::Shader::ImageAtomicSize::S32: + return {"i", "r32i, "}; + default: + UNIMPLEMENTED_MSG("Unimplemented atomic size={}", + static_cast<u32>(image.GetSize())); + return {"", ""}; + } + }(); + + std::string qualifier = "coherent volatile"; + if (image.IsRead() && !image.IsWritten()) { + qualifier += " readonly"; + } else if (image.IsWritten() && !image.IsRead()) { + qualifier += " writeonly"; + } + + code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform " "{} {};", - image.GetIndex(), image_type, GetImage(image)); + image.GetIndex(), qualifier, image_type, GetImage(image)); } if (!images.empty()) { code.AddNewLine(); @@ -858,6 +889,12 @@ private: Type::Uint}; } + if (const auto smem = std::get_if<SmemNode>(&*node)) { + return { + fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), + Type::Uint}; + } + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool}; } @@ -1174,6 +1211,74 @@ private: return expr; } + std::string BuildIntegerCoordinates(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const std::size_t coords_count{operation.GetOperandsCount()}; + std::string expr = constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i).AsInt(); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + std::string BuildImageValues(Operation operation) { + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> { + constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; + if (!meta.image.IsSizeKnown()) { + return {float_constructors, Type::Float}; + } + switch (meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; + case Tegra::Shader::ImageAtomicSize::S32: + return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", + static_cast<u32>(meta.image.GetSize())); + return {float_constructors, Type::Float}; + } + }(); + + const std::size_t values_count{meta.values.size()}; + std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)).As(type); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + Expression AtomicImage(Operation operation, const char* opname) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + ASSERT(meta.values.size() == 1); + ASSERT(meta.image.IsSizeKnown()); + + const auto type = [&]() { + switch (const auto size = meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return Type::Uint; + case Tegra::Shader::ImageAtomicSize::S32: + return Type::Int; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size)); + return Type::Uint; + } + }(); + + return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), + BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), + type}; + } + Expression Assign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -1199,6 +1304,11 @@ private: target = { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; + } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { + ASSERT(stage == ProgramType::Compute); + target = { + fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), + Type::Uint}; } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()).AsUint(); const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); @@ -1692,36 +1802,37 @@ private: } Expression ImageStore(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; const auto meta{std::get<MetaImage>(operation.GetMeta())}; + code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), + BuildIntegerCoordinates(operation), BuildImageValues(operation)); + return {}; + } - std::string expr = "imageStore("; - expr += GetImage(meta.image); - expr += ", "; + Expression AtomicImageAdd(Operation operation) { + return AtomicImage(operation, "imageAtomicAdd"); + } - const std::size_t coords_count{operation.GetOperandsCount()}; - expr += constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += "), "; + Expression AtomicImageMin(Operation operation) { + return AtomicImage(operation, "imageAtomicMin"); + } - const std::size_t values_count{meta.values.size()}; - UNIMPLEMENTED_IF(values_count != 4); - expr += "vec4("; - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsFloat(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += "));"; + Expression AtomicImageMax(Operation operation) { + return AtomicImage(operation, "imageAtomicMax"); + } + Expression AtomicImageAnd(Operation operation) { + return AtomicImage(operation, "imageAtomicAnd"); + } - code.AddLine(expr); - return {}; + Expression AtomicImageOr(Operation operation) { + return AtomicImage(operation, "imageAtomicOr"); + } + + Expression AtomicImageXor(Operation operation) { + return AtomicImage(operation, "imageAtomicXor"); + } + + Expression AtomicImageExchange(Operation operation) { + return AtomicImage(operation, "imageAtomicExchange"); } Expression Branch(Operation operation) { @@ -1846,8 +1957,7 @@ private: Expression BallotThread(Operation operation) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia warp intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub on non-Nvidia devices by simulating all threads voting the same as the active // one. return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint}; @@ -1858,8 +1968,7 @@ private: Expression Vote(Operation operation, const char* func) { const std::string value = VisitOperand(operation, 0).AsBool(); if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); // Stub with a warp size of one. return {value, Type::Bool}; } @@ -1876,15 +1985,54 @@ private: Expression VoteEqual(Operation operation) { if (!device.HasWarpIntrinsics()) { - LOG_ERROR(Render_OpenGL, - "Nvidia vote intrinsics are not available and its required by a shader"); - // We must return true here since a stub for a theoretical warp size of 1 will always - // return an equal result for all its votes. + LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader"); + // We must return true here since a stub for a theoretical warp size of 1. + // This will always return an equal result across all votes. return {"true", Type::Bool}; } return Vote(operation, "allThreadsEqualNV"); } + template <const std::string_view& func> + Expression Shuffle(Operation operation) { + const std::string value = VisitOperand(operation, 0).AsFloat(); + if (!device.HasWarpIntrinsics()) { + LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); + // On a "single-thread" device we are either on the same thread or out of bounds. Both + // cases return the passed value. + return {value, Type::Float}; + } + + const std::string index = VisitOperand(operation, 1).AsUint(); + const std::string width = VisitOperand(operation, 2).AsUint(); + return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; + } + + template <const std::string_view& func> + Expression InRangeShuffle(Operation operation) { + const std::string index = VisitOperand(operation, 0).AsUint(); + const std::string width = VisitOperand(operation, 1).AsUint(); + if (!device.HasWarpIntrinsics()) { + // On a "single-thread" device we are only in bounds when the requested index is 0. + return {fmt::format("({} == 0U)", index), Type::Bool}; + } + + const std::string in_range = code.GenerateTemporary(); + code.AddLine("bool {};", in_range); + code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); + return {in_range, Type::Bool}; + } + + struct Func final { + Func() = delete; + ~Func() = delete; + + static constexpr std::string_view ShuffleIndexed = "shuffleNV"; + static constexpr std::string_view ShuffleUp = "shuffleUpNV"; + static constexpr std::string_view ShuffleDown = "shuffleDownNV"; + static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; + }; + static constexpr std::array operation_decompilers = { &GLSLDecompiler::Assign, @@ -2017,6 +2165,13 @@ private: &GLSLDecompiler::TexelFetch, &GLSLDecompiler::ImageStore, + &GLSLDecompiler::AtomicImageAdd, + &GLSLDecompiler::AtomicImageMin, + &GLSLDecompiler::AtomicImageMax, + &GLSLDecompiler::AtomicImageAnd, + &GLSLDecompiler::AtomicImageOr, + &GLSLDecompiler::AtomicImageXor, + &GLSLDecompiler::AtomicImageExchange, &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, @@ -2040,6 +2195,16 @@ private: &GLSLDecompiler::VoteAll, &GLSLDecompiler::VoteAny, &GLSLDecompiler::VoteEqual, + + &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::Shuffle<Func::ShuffleUp>, + &GLSLDecompiler::Shuffle<Func::ShuffleDown>, + &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, + + &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, + &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, }; static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); @@ -2080,6 +2245,10 @@ private: return "lmem_" + suffix; } + std::string GetSharedMemory() const { + return fmt::format("smem_{}", suffix); + } + std::string GetInternalFlag(InternalFlag flag) const { constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", "overflow_flag"}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 969fe9ced..f141c4e3b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -341,13 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 index{}; u32 type{}; u8 is_bindless{}; + u8 is_written{}; + u8 is_read{}; + u8 is_size_known{}; + u32 size{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || - !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) { + !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || + !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || + !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { return {}; } entry.entries.images.emplace_back( static_cast<std::size_t>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0); + static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, + is_read != 0, + is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size)) + : std::nullopt); } u32 global_memory_count{}; @@ -426,10 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: return false; } for (const auto& image : entries.images) { + const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U; if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) { + !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || + !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index f4777d0b0..bf86b5a0b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -16,7 +16,6 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; OpenGLState OpenGLState::cur_state; -bool OpenGLState::s_rgb_used; namespace { @@ -34,6 +33,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) { return changed; } +template <typename T> +std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { + std::optional<std::size_t> first; + std::size_t last; + for (std::size_t i = 0; i < std::size(current_values); ++i) { + if (!UpdateValue(current_values[i], new_values[i])) { + continue; + } + if (!first) { + first = i; + } + last = i; + } + if (!first) { + return std::nullopt; + } + return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); +} + void Enable(GLenum cap, bool enable) { if (enable) { glEnable(cap); @@ -134,10 +152,6 @@ OpenGLState::OpenGLState() { logic_op.enabled = false; logic_op.operation = GL_COPY; - for (auto& texture_unit : texture_units) { - texture_unit.Reset(); - } - draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -267,8 +281,6 @@ void OpenGLState::ApplySRgb() const { return; cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; if (framebuffer_srgb.enabled) { - // Track if sRGB is used - s_rgb_used = true; glEnable(GL_FRAMEBUFFER_SRGB); } else { glDisable(GL_FRAMEBUFFER_SRGB); @@ -496,52 +508,20 @@ void OpenGLState::ApplyAlphaTest() const { } void OpenGLState::ApplyTextures() const { - bool has_delta{}; - std::size_t first{}; - std::size_t last{}; - std::array<GLuint, Maxwell::NumTextureSamplers> textures; - - for (std::size_t i = 0; i < std::size(texture_units); ++i) { - const auto& texture_unit = texture_units[i]; - auto& cur_state_texture_unit = cur_state.texture_units[i]; - textures[i] = texture_unit.texture; - if (cur_state_texture_unit.texture == textures[i]) { - continue; - } - cur_state_texture_unit.texture = textures[i]; - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; - } - if (has_delta) { - glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - textures.data() + first); + if (const auto update = UpdateArray(cur_state.textures, textures)) { + glBindTextures(update->first, update->second, textures.data() + update->first); } } void OpenGLState::ApplySamplers() const { - bool has_delta{}; - std::size_t first{}; - std::size_t last{}; - std::array<GLuint, Maxwell::NumTextureSamplers> samplers; - - for (std::size_t i = 0; i < std::size(samplers); ++i) { - samplers[i] = texture_units[i].sampler; - if (cur_state.texture_units[i].sampler == texture_units[i].sampler) { - continue; - } - cur_state.texture_units[i].sampler = texture_units[i].sampler; - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (const auto update = UpdateArray(cur_state.samplers, samplers)) { + glBindSamplers(update->first, update->second, samplers.data() + update->first); } - if (has_delta) { - glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - samplers.data() + first); +} + +void OpenGLState::ApplyImages() const { + if (const auto update = UpdateArray(cur_state.images, images)) { + glBindImageTextures(update->first, update->second, images.data() + update->first); } } @@ -576,6 +556,7 @@ void OpenGLState::Apply() { ApplyLogicOp(); ApplyTextures(); ApplySamplers(); + ApplyImages(); if (dirty.polygon_offset) { ApplyPolygonOffset(); dirty.polygon_offset = false; @@ -606,18 +587,18 @@ void OpenGLState::EmulateViewportWithScissor() { } OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { - for (auto& unit : texture_units) { - if (unit.texture == handle) { - unit.Unbind(); + for (auto& texture : textures) { + if (texture == handle) { + texture = 0; } } return *this; } OpenGLState& OpenGLState::ResetSampler(GLuint handle) { - for (auto& unit : texture_units) { - if (unit.sampler == handle) { - unit.sampler = 0; + for (auto& sampler : samplers) { + if (sampler == handle) { + sampler = 0; } } return *this; diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fdf9a8a12..c358d3b38 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -118,21 +118,9 @@ public: GLenum operation; } logic_op; - // 3 texture units - one for each that is used in PICA fragment shader emulation - struct TextureUnit { - GLuint texture; // GL_TEXTURE_BINDING_2D - GLuint sampler; // GL_SAMPLER_BINDING - - void Unbind() { - texture = 0; - } - - void Reset() { - Unbind(); - sampler = 0; - } - }; - std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{}; struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING @@ -187,14 +175,6 @@ public: return cur_state; } - static bool GetsRGBUsed() { - return s_rgb_used; - } - - static void ClearsRGBUsed() { - s_rgb_used = false; - } - void SetDefaultViewports(); /// Apply this state as the current OpenGL state void Apply(); @@ -220,6 +200,7 @@ public: void ApplyLogicOp() const; void ApplyTextures() const; void ApplySamplers() const; + void ApplyImages() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; void ApplyAlphaTest() const; @@ -264,8 +245,6 @@ public: private: static OpenGLState cur_state; - // Workaround for sRGB problems caused by QT not supporting srgb output - static bool s_rgb_used; struct { bool blend_state; bool stencil_state; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 21324488a..8e13ab38b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -78,6 +78,17 @@ public: /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER void Attach(GLenum attachment, GLenum target) const; + void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, + Tegra::Texture::SwizzleSource y_source, + Tegra::Texture::SwizzleSource z_source, + Tegra::Texture::SwizzleSource w_source); + + void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); + + void MarkAsModified(u64 tick) { + surface.MarkAsModified(true, tick); + } + GLuint GetTexture() const { if (is_proxy) { return surface.GetTexture(); @@ -89,13 +100,6 @@ public: return surface.GetSurfaceParams(); } - void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, - Tegra::Texture::SwizzleSource y_source, - Tegra::Texture::SwizzleSource z_source, - Tegra::Texture::SwizzleSource w_source); - - void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); - private: u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, @@ -111,8 +115,8 @@ private: GLenum target{}; OGLTextureView texture_view; - u32 swizzle; - bool is_proxy; + u32 swizzle{}; + bool is_proxy{}; }; class TextureCacheOpenGL final : public TextureCacheBase { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index af9684839..1e6ef66ab 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -264,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - OpenGLState::ClearsRGBUsed(); rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); } @@ -342,21 +341,17 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), }}; - state.texture_units[0].texture = screen_info.display_texture; - // Workaround brigthness problems in SMO by enabling sRGB in the final output - // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 - state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); + state.textures[0] = screen_info.display_texture; + state.framebuffer_srgb.enabled = screen_info.display_srgb; state.AllDirty(); state.Apply(); glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); // Restore default state state.framebuffer_srgb.enabled = false; - state.texture_units[0].texture = 0; + state.textures[0] = 0; state.AllDirty(); state.Apply(); - // Clear sRGB state for the next frame - OpenGLState::ClearsRGBUsed(); } /** @@ -406,8 +401,8 @@ void RendererOpenGL::CaptureScreenshot() { GLuint renderbuffer; glGenRenderbuffers(1, &renderbuffer); glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); - glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width, - layout.height); + glRenderbufferStorage(GL_RENDERBUFFER, screen_info.display_srgb ? GL_SRGB8 : GL_RGB8, + layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); DrawScreen(layout); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 9bd086368..cf26628ca 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -38,7 +38,8 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { - GLuint display_texture; + GLuint display_texture{}; + bool display_srgb{}; const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; |
