diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
23 files changed, 731 insertions, 600 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index fd091c84c..25652e794 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,7 +7,7 @@ #include "common/alignment.h" #include "core/core.h" -#include "core/memory.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index da9326253..ea4a593af 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -4,9 +4,9 @@ #include <glad/glad.h> -#include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -14,28 +14,28 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, + max_size{max_size} { buffer.Create(); - // Bind and unbind the buffer so it gets allocated by the driver - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } -void CachedGlobalRegion::Reload(u32 size_) { - constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); +CachedGlobalRegion::~CachedGlobalRegion() = default; +void CachedGlobalRegion::Reload(u32 size_) { size = size_; if (size > max_size) { size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, max_size); } + glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); +} - // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); +void CachedGlobalRegion::Flush() { + LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); + glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { @@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, - u8* host_ptr) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, + u32 size) { GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); - region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; + ASSERT(cpu_addr); + + region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); ReserveGlobalRegion(region); } region->Reload(size); @@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} {} + : RasterizerCache{rasterizer} { + GLint max_ssbo_size_; + glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); + max_ssbo_size = static_cast<u32>(max_ssbo_size_); +} GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, @@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()}; const auto actual_addr{memory_manager.Read<u64>(addr)}; @@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); + region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 5a21ab66f..196e6e278 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -19,7 +19,7 @@ namespace OpenGL { namespace GLShader { class GlobalMemoryEntry; -} // namespace GLShader +} class RasterizerOpenGL; class CachedGlobalRegion; @@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); + explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); + ~CachedGlobalRegion(); VAddr GetCpuAddr() const override { return cpu_addr; @@ -45,14 +46,14 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - // TODO(Rodrigo): When global memory is written (STG), implement flushing - void Flush() override { - UNIMPLEMENTED(); - } + void Flush() override; private: VAddr cpu_addr{}; + u8* host_ptr{}; u32 size{}; + u32 max_size{}; + OGLBuffer buffer; }; @@ -66,10 +67,11 @@ public: private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<CacheAddr, GlobalRegion> reserve; + u32 max_ssbo_size{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp index 2bcbd3da2..c3e94d917 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -7,7 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h index 0e2e7dc36..4e87ce4d6 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -4,11 +4,9 @@ #pragma once -#include <vector> #include <glad/glad.h> #include "common/common_types.h" -#include "video_core/memory_manager.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 046fc935b..0b1fe3494 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -17,7 +17,6 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" @@ -26,7 +25,6 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/video_core.h" namespace OpenGL { @@ -301,6 +299,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { BaseBindings base_bindings; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + // Prepare packed bindings + bind_ubo_pushbuffer.Setup(base_bindings.cbuf); + bind_ssbo_pushbuffer.Setup(base_bindings.gmem); + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -318,13 +320,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu.state.shader_stages[stage]); + ubo.SetFromRegs(gpu, stage); const GLintptr offset = buffer_cache.UploadHostMemory( &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); // Bind the emulation info buffer - glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, + static_cast<GLsizeiptr>(sizeof(ubo))); Shader shader{shader_cache.GetStageProgram(program)}; const auto [program_handle, next_bindings] = @@ -368,6 +370,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { base_bindings = next_bindings; } + bind_ubo_pushbuffer.Bind(); + bind_ssbo_pushbuffer.Bind(); + SyncClipEnabled(clip_distances); gpu.dirty_flags.shaders = false; @@ -577,9 +582,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( } void RasterizerOpenGL::Clear() { - const auto prev_state{state}; - SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; @@ -651,7 +653,10 @@ void RasterizerOpenGL::Clear() { clear_state.EmulateViewportWithScissor(); } - clear_state.Apply(); + clear_state.ApplyColorMask(); + clear_state.ApplyDepth(); + clear_state.ApplyStencilTest(); + clear_state.ApplyViewport(); if (use_color) { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); @@ -751,6 +756,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } res_cache.FlushRegion(addr, size); + global_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -902,23 +908,14 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; const auto& entries = shader->GetShaderEntries().const_buffers; - constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; - std::array<GLuint, max_binds> bind_buffers; - std::array<GLintptr, max_binds> bind_offsets; - std::array<GLsizeiptr, max_binds> bind_sizes; - - ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points."); - // Upload only the enabled buffers from the 16 constbuffers of each shader stage for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; if (!buffer.enabled) { - // With disabled buffers set values as zero to unbind them - bind_buffers[bindpoint] = 0; - bind_offsets[bindpoint] = 0; - bind_sizes[bindpoint] = 0; + // Set values to zero to unbind buffers + bind_ubo_pushbuffer.Push(0, 0, 0); continue; } @@ -946,30 +943,22 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const GLintptr const_buffer_offset = buffer_cache.UploadMemory( buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); - // Prepare values for multibind - bind_buffers[bindpoint] = buffer_cache.GetHandle(); - bind_offsets[bindpoint] = const_buffer_offset; - bind_sizes[bindpoint] = size; + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); } - - // The first binding is reserved for emulation values - const GLuint ubo_base_binding = base_bindings.cbuf + 1; - glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()), - bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, GLenum primitive_mode, BaseBindings base_bindings) { - // TODO(Rodrigo): Use ARB_multi_bind here const auto& entries = shader->GetShaderEntries().global_memory_entries; - - for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) { - const auto& entry = entries[bindpoint]; - const u32 current_bindpoint = base_bindings.gmem + bindpoint; - const auto& region = global_cache.GetGlobalRegion(entry, stage); - - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); + for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { + const auto& entry{entries[bindpoint]}; + const auto& region{global_cache.GetGlobalRegion(entry, stage)}; + if (entry.IsWritten()) { + region->MarkAsModified(true, global_cache); + } + bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, + static_cast<GLsizeiptr>(region->GetSizeInBytes())); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4de565321..d4c2cf80e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -12,15 +12,12 @@ #include <optional> #include <tuple> #include <utility> -#include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" @@ -29,10 +26,9 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" +#include "video_core/renderer_opengl/utils.h" namespace Core { class System; @@ -75,10 +71,6 @@ public: static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - static constexpr std::size_t MaxGlobalMemorySize = 0x10000; - static_assert(MaxGlobalMemorySize % sizeof(float) == 0, - "The maximum size of a global memory must be a multiple of the size of float"); - private: class SamplerInfo { public: @@ -234,6 +226,9 @@ private: PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; + BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; + BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + std::size_t CalculateVertexArraysSize() const; std::size_t CalculateIndexBufferSize() const; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index aba6ce731..7a68b8738 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -13,9 +13,9 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/hle/kernel/process.h" -#include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" @@ -112,11 +112,26 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), params.srgb_conversion); - if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) { + if (config.tsc.depth_compare_enabled) { // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also // causes GetFormatType to properly return 'Depth' below). - params.pixel_format = PixelFormat::Z16; + if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) { + switch (params.pixel_format) { + case PixelFormat::R16S: + case PixelFormat::R16U: + case PixelFormat::R16F: + params.pixel_format = PixelFormat::Z16; + break; + case PixelFormat::R32F: + params.pixel_format = PixelFormat::Z32F; + break; + default: + LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}", + static_cast<u32>(params.pixel_format)); + break; + } + } } params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); @@ -266,6 +281,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); params.width = config.width; + params.pitch = config.pitch; params.height = config.height; params.unaligned_height = config.height; params.target = SurfaceTarget::Texture2D; @@ -662,8 +678,8 @@ void CachedSurface::FlushGLBuffer() { gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0); + const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); + glPixelStorei(GL_PACK_ALIGNMENT, align); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); ASSERT(!tuple.compressed); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -708,8 +724,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0); + const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); + glPixelStorei(GL_UNPACK_ALIGNMENT, align); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); @@ -1175,10 +1191,16 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } + const bool old_compressed = + GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; + const bool new_compressed = + GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; + const bool compatible_formats = + GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && + !(old_compressed || new_compressed); // For compatible surfaces, we can just do fast glCopyImageSubData based copy - if (old_params.target == new_params.target && old_params.type == new_params.type && - old_params.depth == new_params.depth && old_params.depth == 1 && - GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) { + if (old_params.target == new_params.target && old_params.depth == new_params.depth && + old_params.depth == 1 && compatible_formats) { FastCopySurface(old_surface, new_surface); return new_surface; } @@ -1193,7 +1215,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - if (old_params.pixel_format == new_params.pixel_format) + if (compatible_formats) FastLayeredCopySurface(old_surface, new_surface); else { AccurateCopySurface(old_surface, new_surface); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e8073579f..db280dbb3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -5,13 +5,13 @@ #pragma once #include <array> -#include <map> #include <memory> #include <string> -#include <unordered_set> +#include <tuple> #include <vector> #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" @@ -206,6 +206,13 @@ struct SurfaceParams { return bd; } + u32 RowAlign(u32 mip_level) const { + const u32 m_width = MipWidth(mip_level); + const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); + const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); + return (1U << l2); + } + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 290e654bc..99f67494c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -6,13 +6,12 @@ #include "common/assert.h" #include "common/hash.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/shader/shader_ir.h" @@ -41,6 +40,10 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { /// Gets the shader program code from memory for the specified address ProgramCode GetShaderCode(const u8* host_ptr) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); + ASSERT_OR_EXECUTE(host_ptr != nullptr, { + std::fill(program_code.begin(), program_code.end(), 0); + return program_code; + }); std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); return program_code; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index fd1c85115..0cf8e0b3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,21 +5,20 @@ #pragma once #include <array> +#include <atomic> #include <memory> #include <set> #include <tuple> #include <unordered_map> +#include <vector> #include <glad/glad.h> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" namespace Core { class System; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 11d1169f0..445048daf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -21,6 +21,8 @@ namespace OpenGL::GLShader { +namespace { + using Tegra::Shader::Attribute; using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; @@ -34,13 +36,15 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; +enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; + +struct TextureAoffi {}; +using TextureArgument = std::pair<Type, Node>; +using TextureIR = std::variant<TextureAoffi, TextureArgument>; + enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); -constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); - -enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; class ShaderWriter { public: @@ -69,10 +73,10 @@ public: shader_source += '\n'; } - std::string GenerateTemporal() { - std::string temporal = "tmp"; - temporal += std::to_string(temporal_index++); - return temporal; + std::string GenerateTemporary() { + std::string temporary = "tmp"; + temporary += std::to_string(temporary_index++); + return temporary; } std::string GetResult() { @@ -87,11 +91,11 @@ private: } std::string shader_source; - u32 temporal_index = 1; + u32 temporary_index = 1; }; /// Generates code to use for a swizzle operation. -static std::string GetSwizzle(u32 elem) { +std::string GetSwizzle(u32 elem) { ASSERT(elem <= 3); std::string swizzle = "."; swizzle += "xyzw"[elem]; @@ -99,7 +103,7 @@ static std::string GetSwizzle(u32 elem) { } /// Translate topology -static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { +std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { case Tegra::Shader::OutputTopology::PointList: return "points"; @@ -114,7 +118,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { } /// Returns true if an object has to be treated as precise -static bool IsPrecise(Operation operand) { +bool IsPrecise(Operation operand) { const auto& meta = operand.GetMeta(); if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { @@ -126,7 +130,7 @@ static bool IsPrecise(Operation operand) { return false; } -static bool IsPrecise(Node node) { +bool IsPrecise(Node node) { if (const auto operation = std::get_if<OperationNode>(node)) { return IsPrecise(*operation); } @@ -202,8 +206,10 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& gmem : ir.GetGlobalMemoryBases()) { - entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); + for (const auto& gmem_pair : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem_pair; + entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, + usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -374,12 +380,22 @@ private: } void DeclareGlobalMemory() { - for (const auto& entry : ir.GetGlobalMemoryBases()) { + for (const auto& gmem : ir.GetGlobalMemory()) { + const auto& [base, usage] = gmem; + + // Since we don't know how the shader will use the shader, hint the driver to disable as + // much optimizations as possible + std::string qualifier = "coherent volatile"; + if (usage.is_read && !usage.is_written) + qualifier += " readonly"; + else if (usage.is_written && !usage.is_read) + qualifier += " writeonly"; + const std::string binding = - fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); - code.AddLine("layout (std430, binding = " + binding + ") buffer " + - GetGlobalMemoryBlock(entry) + " {"); - code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset); + code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " + + GetGlobalMemoryBlock(base) + " {"); + code.AddLine(" float " + GetGlobalMemory(base) + "[];"); code.AddLine("};"); code.AddNewLine(); } @@ -426,9 +442,14 @@ private: std::string Visit(Node node) { if (const auto operation = std::get_if<OperationNode>(node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); + if (operation_index >= operation_decompilers.size()) { + UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); + return {}; + } const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); + UNREACHABLE_MSG("Undefined operation: {}", operation_index); + return {}; } return (this->*decompiler)(*operation); @@ -540,9 +561,8 @@ private: } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access - const std::string final_offset = code.GenerateTemporal(); - code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + - std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); + const std::string final_offset = code.GenerateTemporary(); + code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4);"); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), final_offset, final_offset); @@ -587,9 +607,9 @@ private: // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; - const std::string temporal = code.GenerateTemporal(); - code.AddLine(precise + "float " + temporal + " = " + value + ';'); - return temporal; + const std::string temporary = code.GenerateTemporary(); + code.AddLine(precise + "float " + temporary + " = " + value + ';'); + return temporary; } std::string VisitOperand(Operation operation, std::size_t operand_index) { @@ -601,9 +621,9 @@ private: return Visit(operand); } - const std::string temporal = code.GenerateTemporal(); - code.AddLine("float " + temporal + " = " + Visit(operand) + ';'); - return temporal; + const std::string temporary = code.GenerateTemporary(); + code.AddLine("float " + temporary + " = " + Visit(operand) + ';'); + return temporary; } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { @@ -718,8 +738,8 @@ private: result_type)); } - std::string GenerateTexture(Operation operation, const std::string& func, - const std::vector<std::pair<Type, Node>>& extras) { + std::string GenerateTexture(Operation operation, const std::string& function_suffix, + const std::vector<TextureIR>& extras) { constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -729,11 +749,11 @@ private: const bool has_array = meta->sampler.IsArray(); const bool has_shadow = meta->sampler.IsShadow(); - std::string expr = func; - expr += '('; - expr += GetSampler(meta->sampler); - expr += ", "; - + std::string expr = "texture" + function_suffix; + if (!meta->aoffi.empty()) { + expr += "Offset"; + } + expr += '(' + GetSampler(meta->sampler) + ", "; expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { @@ -751,36 +771,74 @@ private: } expr += ')'; - for (const auto& extra_pair : extras) { - const auto [type, operand] = extra_pair; - if (operand == nullptr) { - continue; + for (const auto& variant : extras) { + if (const auto argument = std::get_if<TextureArgument>(&variant)) { + expr += GenerateTextureArgument(*argument); + } else if (std::get_if<TextureAoffi>(&variant)) { + expr += GenerateTextureAoffi(meta->aoffi); + } else { + UNREACHABLE(); } - expr += ", "; + } - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - expr += "ftoi(" + Visit(operand) + ')'; - } - break; - case Type::Float: - expr += Visit(operand); - break; - default: { - const auto type_int = static_cast<u32>(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; + return expr + ')'; + } + + std::string GenerateTextureArgument(TextureArgument argument) { + const auto [type, operand] = argument; + if (operand == nullptr) { + return {}; + } + + std::string expr = ", "; + switch (type) { + case Type::Int: + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + // Inline the string as an immediate integer in GLSL (some extra arguments are + // required to be constant) + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(operand) + ')'; + } + break; + case Type::Float: + expr += Visit(operand); + break; + default: { + const auto type_int = static_cast<u32>(type); + UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); + expr += '0'; + break; + } + } + return expr; + } + + std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { + if (aoffi.empty()) { + return {}; + } + constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; + std::string expr = ", "; + expr += coord_constructors.at(aoffi.size() - 1); + expr += '('; + + for (std::size_t index = 0; index < aoffi.size(); ++index) { + const auto operand{aoffi.at(index)}; + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + // Inline the string as an immediate integer in GLSL (AOFFI arguments are required + // to be constant by the standard). + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(operand) + ')'; } + if (index + 1 < aoffi.size()) { + expr += ", "; } } + expr += ')'; - return expr + ')'; + return expr; } std::string Assign(Operation operation) { @@ -820,6 +878,12 @@ private: } else if (const auto lmem = std::get_if<LmemNode>(dest)) { target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; + } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else { UNREACHABLE_MSG("Assign called without a proper target"); } @@ -1159,7 +1223,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); + std::string expr = GenerateTexture( + operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1170,7 +1235,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); + std::string expr = GenerateTexture( + operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1182,7 +1248,8 @@ private: ASSERT(meta); const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; - return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + + return GenerateTexture(operation, "Gather", + {TextureArgument{type, meta->component}, TextureAoffi{}}) + GetSwizzle(meta->element); } @@ -1196,11 +1263,12 @@ private: switch (meta->element) { case 0: case 1: - return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element); + return "itof(int(textureSize(" + sampler + ", " + lod + ')' + + GetSwizzle(meta->element) + "))"; case 2: return "0"; case 3: - return "textureQueryLevels(" + sampler + ')'; + return "itof(textureQueryLevels(" + sampler + "))"; } UNREACHABLE(); return "0"; @@ -1211,8 +1279,8 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + - " * vec2(256))" + GetSwizzle(meta->element) + "))"; + return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + + GetSwizzle(meta->element) + "))"; } return "0"; } @@ -1565,11 +1633,11 @@ private: ShaderWriter code; }; +} // Anonymous namespace + std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); - const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + - "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 72aca4938..55b3d4d7b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,7 +5,6 @@ #pragma once #include <array> -#include <set> #include <string> #include <utility> #include <vector> @@ -40,8 +39,9 @@ private: class GlobalMemoryEntry { public: - explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) - : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{ + is_written} {} u32 GetCbufIndex() const { return cbuf_index; @@ -51,9 +51,19 @@ public: return cbuf_offset; } + bool IsRead() const { + return is_read; + } + + bool IsWritten() const { + return is_written; + } + private: u32 cbuf_index{}; u32 cbuf_offset{}; + bool is_read{}; + bool is_written{}; }; struct ShaderEntries { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 82fc4d44b..d5890a375 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -4,7 +4,6 @@ #include <cstring> #include <fmt/format.h> -#include <lz4.h> #include "common/assert.h" #include "common/common_paths.h" @@ -12,6 +11,7 @@ #include "common/file_util.h" #include "common/logging/log.h" #include "common/scm_rev.h" +#include "common/zstd_compression.h" #include "core/core.h" #include "core/hle/kernel/process.h" @@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -template <typename T> -std::vector<u8> CompressData(const T* source, std::size_t source_size) { - if (source_size > LZ4_MAX_INPUT_SIZE) { - // Source size exceeds LZ4 maximum input size - return {}; - } - const auto source_size_int = static_cast<int>(source_size); - const int max_compressed_size = LZ4_compressBound(source_size_int); - std::vector<u8> compressed(max_compressed_size); - const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source), - reinterpret_cast<char*>(compressed.data()), - source_size_int, max_compressed_size); - if (compressed_size <= 0) { - // Compression failed - return {}; - } - compressed.resize(compressed_size); - return compressed; -} - -std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) { - std::vector<u8> uncompressed(uncompressed_size); - const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()), - reinterpret_cast<char*>(uncompressed.data()), - static_cast<int>(compressed.size()), - static_cast<int>(uncompressed.size())); - if (static_cast<int>(uncompressed_size) != size_check) { - // Decompression failed - return {}; - } - return uncompressed; -} - } // namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, @@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - dump.binary = DecompressData(compressed_binary, binary_length); + dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary); if (dump.binary.empty()) { return {}; } @@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - const std::vector<u8> code = DecompressData(compressed_code, code_size); + const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code); if (code.empty()) { return {}; } @@ -370,11 +337,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; + u8 is_read{}; + u8 is_written{}; if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || - file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { + file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) || + file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) || + file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, + is_written != 0); } for (auto& clip_distance : entry.entries.clip_distances) { @@ -430,7 +402,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu return false; for (const auto& gmem : entries.global_memory_entries) { if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || - file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { + file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 || + file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) { return false; } } @@ -507,7 +481,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str if (!IsUsable()) return; - const std::vector<u8> compressed_code{CompressData(code.data(), code.size())}; + const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault( + reinterpret_cast<const u8*>(code.data()), code.size())}; if (compressed_code.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", unique_identifier); @@ -537,7 +512,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size()); + const std::vector<u8> compressed_binary = + Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size()); + if (compressed_binary.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", usage.unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7d96649af..8763d9c71 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <fmt/format.h> -#include "common/assert.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fba8e681b..fad346b48 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,12 +4,9 @@ #pragma once -#include <array> -#include <string> #include <vector> #include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 6a30c28d2..05ab01dcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -2,23 +2,55 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/core.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { -void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - const auto& regs = gpu.regs; - const auto& state = gpu.state; +using Tegra::Engines::Maxwell3D; + +ProgramManager::ProgramManager() { + pipeline.Create(); +} + +ProgramManager::~ProgramManager() = default; + +void ProgramManager::ApplyTo(OpenGLState& state) { + UpdatePipeline(); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; +} + +void ProgramManager::UpdatePipeline() { + // Avoid updating the pipeline when values have no changed + if (old_state == current_state) { + return; + } + + // Workaround for AMD bug + constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | + GL_FRAGMENT_SHADER_BIT}; + glUseProgramStages(pipeline.handle, all_used_stages, 0); + + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); + + old_state = current_state; +} + +void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { + const auto& regs = maxwell.regs; + const auto& state = maxwell.state; // TODO(bunnei): Support more than one viewport viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - u32 func = static_cast<u32>(regs.alpha_test_func); + auto func{static_cast<u32>(regs.alpha_test_func)}; // Normalize the gl variants of opCompare to be the same as the normal variants - u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never); + const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); if (func >= op_gl_variant_base) { func = func - op_gl_variant_base + 1U; } @@ -31,8 +63,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh // Assign in which stage the position has to be flipped // (the last stage before the fragment shader). - if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { - flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); + constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); + if (maxwell.regs.shader_config[geometry_index].enable) { + flip_stage = geometry_index; } else { flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 4970aafed..cec18a832 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -4,6 +4,8 @@ #pragma once +#include <cstddef> + #include <glad/glad.h> #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -12,14 +14,13 @@ namespace OpenGL::GLShader { -using Tegra::Engines::Maxwell3D; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -// Not following that rule will cause problems on some AMD drivers. +/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at +/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +/// Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { - void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); + void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage); + alignas(16) GLvec4 viewport_flip; struct alignas(16) { GLuint instance_id; @@ -39,56 +40,48 @@ static_assert(sizeof(MaxwellUniformData) < 16384, class ProgramManager { public: - ProgramManager() { - pipeline.Create(); - } + explicit ProgramManager(); + ~ProgramManager(); + + void ApplyTo(OpenGLState& state); void UseProgrammableVertexShader(GLuint program) { - vs = program; + current_state.vertex_shader = program; } void UseProgrammableGeometryShader(GLuint program) { - gs = program; + current_state.geometry_shader = program; } void UseProgrammableFragmentShader(GLuint program) { - fs = program; + current_state.fragment_shader = program; } void UseTrivialGeometryShader() { - gs = 0; - } - - void ApplyTo(OpenGLState& state) { - UpdatePipeline(); - state.draw.shader_program = 0; - state.draw.program_pipeline = pipeline.handle; - state.geometry_shaders.enabled = (gs != 0); + current_state.geometry_shader = 0; } private: - void UpdatePipeline() { - // Avoid updating the pipeline when values have no changed - if (old_vs == vs && old_fs == fs && old_gs == gs) - return; - // Workaround for AMD bug - glUseProgramStages(pipeline.handle, - GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, - 0); - - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs); - glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs); - - // Update the old values - old_vs = vs; - old_fs = fs; - old_gs = gs; - } + struct PipelineState { + bool operator==(const PipelineState& rhs) const { + return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && + geometry_shader == rhs.geometry_shader; + } + + bool operator!=(const PipelineState& rhs) const { + return !operator==(rhs); + } + + GLuint vertex_shader{}; + GLuint fragment_shader{}; + GLuint geometry_shader{}; + }; + + void UpdatePipeline(); OGLPipeline pipeline; - GLuint vs{}, fs{}, gs{}; - GLuint old_vs{}, old_fs{}, old_gs{}; + PipelineState current_state; + PipelineState old_state; }; } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9419326a3..52d569a1b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -10,16 +10,62 @@ namespace OpenGL { -OpenGLState OpenGLState::cur_state; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +OpenGLState OpenGLState::cur_state; bool OpenGLState::s_rgb_used; +namespace { + +template <typename T> +bool UpdateValue(T& current_value, const T new_value) { + const bool changed = current_value != new_value; + current_value = new_value; + return changed; +} + +template <typename T1, typename T2> +bool UpdateTie(T1 current_value, const T2 new_value) { + const bool changed = current_value != new_value; + current_value = new_value; + return changed; +} + +void Enable(GLenum cap, bool enable) { + if (enable) { + glEnable(cap); + } else { + glDisable(cap); + } +} + +void Enable(GLenum cap, GLuint index, bool enable) { + if (enable) { + glEnablei(cap, index); + } else { + glDisablei(cap, index); + } +} + +void Enable(GLenum cap, bool& current_value, bool new_value) { + if (UpdateValue(current_value, new_value)) + Enable(cap, new_value); +} + +void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { + if (UpdateValue(current_value, new_value)) + Enable(cap, index, new_value); +} + +} // namespace + OpenGLState::OpenGLState() { // These all match default OpenGL values - geometry_shaders.enabled = false; framebuffer_srgb.enabled = false; + multisample_control.alpha_to_coverage = false; multisample_control.alpha_to_one = false; + cull.enabled = false; cull.mode = GL_BACK; cull.front_face = GL_CCW; @@ -30,14 +76,15 @@ OpenGLState::OpenGLState() { primitive_restart.enabled = false; primitive_restart.index = 0; + for (auto& item : color_mask) { item.red_enabled = GL_TRUE; item.green_enabled = GL_TRUE; item.blue_enabled = GL_TRUE; item.alpha_enabled = GL_TRUE; } - stencil.test_enabled = false; - auto reset_stencil = [](auto& config) { + + const auto ResetStencil = [](auto& config) { config.test_func = GL_ALWAYS; config.test_ref = 0; config.test_mask = 0xFFFFFFFF; @@ -46,8 +93,10 @@ OpenGLState::OpenGLState() { config.action_depth_pass = GL_KEEP; config.action_stencil_fail = GL_KEEP; }; - reset_stencil(stencil.front); - reset_stencil(stencil.back); + stencil.test_enabled = false; + ResetStencil(stencil.front); + ResetStencil(stencil.back); + for (auto& item : viewports) { item.x = 0; item.y = 0; @@ -61,6 +110,7 @@ OpenGLState::OpenGLState() { item.scissor.width = 0; item.scissor.height = 0; } + for (auto& item : blend) { item.enabled = true; item.rgb_equation = GL_FUNC_ADD; @@ -70,11 +120,14 @@ OpenGLState::OpenGLState() { item.src_a_func = GL_ONE; item.dst_a_func = GL_ZERO; } + independant_blend.enabled = false; + blend_color.red = 0.0f; blend_color.green = 0.0f; blend_color.blue = 0.0f; blend_color.alpha = 0.0f; + logic_op.enabled = false; logic_op.operation = GL_COPY; @@ -91,9 +144,12 @@ OpenGLState::OpenGLState() { clip_distance = {}; point.size = 1; + fragment_color_clamp.enabled = false; + depth_clamp.far_plane = false; depth_clamp.near_plane = false; + polygon_offset.fill_enable = false; polygon_offset.line_enable = false; polygon_offset.point_enable = false; @@ -103,260 +159,255 @@ OpenGLState::OpenGLState() { } void OpenGLState::ApplyDefaultState() { + glEnable(GL_BLEND); glDisable(GL_FRAMEBUFFER_SRGB); glDisable(GL_CULL_FACE); glDisable(GL_DEPTH_TEST); glDisable(GL_PRIMITIVE_RESTART); glDisable(GL_STENCIL_TEST); - glEnable(GL_BLEND); glDisable(GL_COLOR_LOGIC_OP); glDisable(GL_SCISSOR_TEST); } +void OpenGLState::ApplyFramebufferState() const { + if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); + } + if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); + } +} + +void OpenGLState::ApplyVertexArrayState() const { + if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { + glBindVertexArray(draw.vertex_array); + } +} + +void OpenGLState::ApplyShaderProgram() const { + if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { + glUseProgram(draw.shader_program); + } +} + +void OpenGLState::ApplyProgramPipeline() const { + if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { + glBindProgramPipeline(draw.program_pipeline); + } +} + +void OpenGLState::ApplyClipDistances() const { + for (std::size_t i = 0; i < clip_distance.size(); ++i) { + Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], + clip_distance[i]); + } +} + +void OpenGLState::ApplyPointSize() const { + if (UpdateValue(cur_state.point.size, point.size)) { + glPointSize(point.size); + } +} + +void OpenGLState::ApplyFragmentColorClamp() const { + if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { + glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, + fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); + } +} + +void OpenGLState::ApplyMultisample() const { + Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, + multisample_control.alpha_to_coverage); + Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, + multisample_control.alpha_to_one); +} + +void OpenGLState::ApplyDepthClamp() const { + if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && + depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { + return; + } + cur_state.depth_clamp = depth_clamp; + + UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, + "Unimplemented Depth Clamp Separation!"); + + Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); +} + void OpenGLState::ApplySRgb() const { - if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { - if (framebuffer_srgb.enabled) { - // Track if sRGB is used - s_rgb_used = true; - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); - } + if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) + return; + cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; + if (framebuffer_srgb.enabled) { + // Track if sRGB is used + s_rgb_used = true; + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); } } void OpenGLState::ApplyCulling() const { - if (cull.enabled != cur_state.cull.enabled) { - if (cull.enabled) { - glEnable(GL_CULL_FACE); - } else { - glDisable(GL_CULL_FACE); - } - } + Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); - if (cull.mode != cur_state.cull.mode) { + if (UpdateValue(cur_state.cull.mode, cull.mode)) { glCullFace(cull.mode); } - if (cull.front_face != cur_state.cull.front_face) { + if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { glFrontFace(cull.front_face); } } void OpenGLState::ApplyColorMask() const { - if (independant_blend.enabled) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - const auto& updated = color_mask[i]; - const auto& current = cur_state.color_mask[i]; - if (updated.red_enabled != current.red_enabled || - updated.green_enabled != current.green_enabled || - updated.blue_enabled != current.blue_enabled || - updated.alpha_enabled != current.alpha_enabled) { - glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, - updated.blue_enabled, updated.alpha_enabled); - } - } - } else { - const auto& updated = color_mask[0]; - const auto& current = cur_state.color_mask[0]; + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + const auto& updated = color_mask[i]; + auto& current = cur_state.color_mask[i]; if (updated.red_enabled != current.red_enabled || updated.green_enabled != current.green_enabled || updated.blue_enabled != current.blue_enabled || updated.alpha_enabled != current.alpha_enabled) { - glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, - updated.alpha_enabled); + current = updated; + glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, + updated.blue_enabled, updated.alpha_enabled); } } } void OpenGLState::ApplyDepth() const { - if (depth.test_enabled != cur_state.depth.test_enabled) { - if (depth.test_enabled) { - glEnable(GL_DEPTH_TEST); - } else { - glDisable(GL_DEPTH_TEST); - } - } + Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); - if (depth.test_func != cur_state.depth.test_func) { + if (cur_state.depth.test_func != depth.test_func) { + cur_state.depth.test_func = depth.test_func; glDepthFunc(depth.test_func); } - if (depth.write_mask != cur_state.depth.write_mask) { + if (cur_state.depth.write_mask != depth.write_mask) { + cur_state.depth.write_mask = depth.write_mask; glDepthMask(depth.write_mask); } } void OpenGLState::ApplyPrimitiveRestart() const { - if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { - if (primitive_restart.enabled) { - glEnable(GL_PRIMITIVE_RESTART); - } else { - glDisable(GL_PRIMITIVE_RESTART); - } - } + Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); - if (primitive_restart.index != cur_state.primitive_restart.index) { + if (cur_state.primitive_restart.index != primitive_restart.index) { + cur_state.primitive_restart.index = primitive_restart.index; glPrimitiveRestartIndex(primitive_restart.index); } } void OpenGLState::ApplyStencilTest() const { - if (stencil.test_enabled != cur_state.stencil.test_enabled) { - if (stencil.test_enabled) { - glEnable(GL_STENCIL_TEST); - } else { - glDisable(GL_STENCIL_TEST); - } - } - - const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) { - if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || - config.test_mask != prev_config.test_mask) { + Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); + + const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { + if (current.test_func != config.test_func || current.test_ref != config.test_ref || + current.test_mask != config.test_mask) { + current.test_func = config.test_func; + current.test_ref = config.test_ref; + current.test_mask = config.test_mask; glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); } - if (config.action_depth_fail != prev_config.action_depth_fail || - config.action_depth_pass != prev_config.action_depth_pass || - config.action_stencil_fail != prev_config.action_stencil_fail) { + if (current.action_depth_fail != config.action_depth_fail || + current.action_depth_pass != config.action_depth_pass || + current.action_stencil_fail != config.action_stencil_fail) { + current.action_depth_fail = config.action_depth_fail; + current.action_depth_pass = config.action_depth_pass; + current.action_stencil_fail = config.action_stencil_fail; glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, config.action_depth_pass); } - if (config.write_mask != prev_config.write_mask) { + if (current.write_mask != config.write_mask) { + current.write_mask = config.write_mask; glStencilMaskSeparate(face, config.write_mask); } }; ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } -// Viewport does not affects glClearBuffer so emulate viewport using scissor test -void OpenGLState::EmulateViewportWithScissor() { - auto& current = viewports[0]; - if (current.scissor.enabled) { - const GLint left = std::max(current.x, current.scissor.x); - const GLint right = - std::max(current.x + current.width, current.scissor.x + current.scissor.width); - const GLint bottom = std::max(current.y, current.scissor.y); - const GLint top = - std::max(current.y + current.height, current.scissor.y + current.scissor.height); - current.scissor.x = std::max(left, 0); - current.scissor.y = std::max(bottom, 0); - current.scissor.width = std::max(right - left, 0); - current.scissor.height = std::max(top - bottom, 0); - } else { - current.scissor.enabled = true; - current.scissor.x = current.x; - current.scissor.y = current.y; - current.scissor.width = current.width; - current.scissor.height = current.height; - } -} void OpenGLState::ApplyViewport() const { - if (geometry_shaders.enabled) { - for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); - i++) { - const auto& current = cur_state.viewports[i]; - const auto& updated = viewports[i]; - if (updated.x != current.x || updated.y != current.y || - updated.width != current.width || updated.height != current.height) { - glViewportIndexedf( - i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), - static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height)); - } - if (updated.depth_range_near != current.depth_range_near || - updated.depth_range_far != current.depth_range_far) { - glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); - } - - if (updated.scissor.enabled != current.scissor.enabled) { - if (updated.scissor.enabled) { - glEnablei(GL_SCISSOR_TEST, i); - } else { - glDisablei(GL_SCISSOR_TEST, i); - } - } - - if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height) { - glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); - } - } - } else { - const auto& current = cur_state.viewports[0]; - const auto& updated = viewports[0]; - if (updated.x != current.x || updated.y != current.y || updated.width != current.width || - updated.height != current.height) { - glViewport(updated.x, updated.y, updated.width, updated.height); - } - - if (updated.depth_range_near != current.depth_range_near || - updated.depth_range_far != current.depth_range_far) { - glDepthRange(updated.depth_range_near, updated.depth_range_far); + for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { + const auto& updated = viewports[i]; + auto& current = cur_state.viewports[i]; + + if (current.x != updated.x || current.y != updated.y || current.width != updated.width || + current.height != updated.height) { + current.x = updated.x; + current.y = updated.y; + current.width = updated.width; + current.height = updated.height; + glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), + static_cast<GLfloat>(updated.width), + static_cast<GLfloat>(updated.height)); } - - if (updated.scissor.enabled != current.scissor.enabled) { - if (updated.scissor.enabled) { - glEnable(GL_SCISSOR_TEST); - } else { - glDisable(GL_SCISSOR_TEST); - } + if (current.depth_range_near != updated.depth_range_near || + current.depth_range_far != updated.depth_range_far) { + current.depth_range_near = updated.depth_range_near; + current.depth_range_far = updated.depth_range_far; + glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); } - if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height) { - glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); + Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); + + if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || + current.scissor.width != updated.scissor.width || + current.scissor.height != updated.scissor.height) { + current.scissor.x = updated.scissor.x; + current.scissor.y = updated.scissor.y; + current.scissor.width = updated.scissor.width; + current.scissor.height = updated.scissor.height; + glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, + updated.scissor.height); } } } void OpenGLState::ApplyGlobalBlending() const { - const Blend& current = cur_state.blend[0]; const Blend& updated = blend[0]; - if (updated.enabled != current.enabled) { - if (updated.enabled) { - glEnable(GL_BLEND); - } else { - glDisable(GL_BLEND); - } - } - if (!updated.enabled) { - return; - } - if (updated.src_rgb_func != current.src_rgb_func || - updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || - updated.dst_a_func != current.dst_a_func) { + Blend& current = cur_state.blend[0]; + + Enable(GL_BLEND, current.enabled, updated.enabled); + + if (current.src_rgb_func != updated.src_rgb_func || + current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || + current.dst_a_func != updated.dst_a_func) { + current.src_rgb_func = updated.src_rgb_func; + current.dst_rgb_func = updated.dst_rgb_func; + current.src_a_func = updated.src_a_func; + current.dst_a_func = updated.dst_a_func; glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { + if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { + current.rgb_equation = updated.rgb_equation; + current.a_equation = updated.a_equation; glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); } } void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { const Blend& updated = blend[target]; - const Blend& current = cur_state.blend[target]; - if (updated.enabled != current.enabled || force) { - if (updated.enabled) { - glEnablei(GL_BLEND, static_cast<GLuint>(target)); - } else { - glDisablei(GL_BLEND, static_cast<GLuint>(target)); - } + Blend& current = cur_state.blend[target]; + + if (current.enabled != updated.enabled || force) { + current.enabled = updated.enabled; + Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); } - if (updated.src_rgb_func != current.src_rgb_func || - updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || - updated.dst_a_func != current.dst_a_func) { + if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, + current.dst_a_func), + std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, + updated.dst_a_func))) { glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { + if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), + std::tie(updated.rgb_equation, updated.a_equation))) { glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, updated.a_equation); } @@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { void OpenGLState::ApplyBlending() const { if (independant_blend.enabled) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - ApplyTargetBlending(i, - independant_blend.enabled != cur_state.independant_blend.enabled); + const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; + for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { + ApplyTargetBlending(target, force); } } else { ApplyGlobalBlending(); } - if (blend_color.red != cur_state.blend_color.red || - blend_color.green != cur_state.blend_color.green || - blend_color.blue != cur_state.blend_color.blue || - blend_color.alpha != cur_state.blend_color.alpha) { + cur_state.independant_blend.enabled = independant_blend.enabled; + + if (UpdateTie( + std::tie(cur_state.blend_color.red, cur_state.blend_color.green, + cur_state.blend_color.blue, cur_state.blend_color.alpha), + std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); } } void OpenGLState::ApplyLogicOp() const { - if (logic_op.enabled != cur_state.logic_op.enabled) { - if (logic_op.enabled) { - glEnable(GL_COLOR_LOGIC_OP); - } else { - glDisable(GL_COLOR_LOGIC_OP); - } - } + Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); - if (logic_op.operation != cur_state.logic_op.operation) { + if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { glLogicOp(logic_op.operation); } } void OpenGLState::ApplyPolygonOffset() const { - const bool fill_enable_changed = - polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; - const bool line_enable_changed = - polygon_offset.line_enable != cur_state.polygon_offset.line_enable; - const bool point_enable_changed = - polygon_offset.point_enable != cur_state.polygon_offset.point_enable; - const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; - const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; - const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; - - if (fill_enable_changed) { - if (polygon_offset.fill_enable) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - } - - if (line_enable_changed) { - if (polygon_offset.line_enable) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - } - - if (point_enable_changed) { - if (polygon_offset.point_enable) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - } - - if (factor_changed || units_changed || clamp_changed) { + Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, + polygon_offset.fill_enable); + Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, + polygon_offset.line_enable); + Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, + polygon_offset.point_enable); + + if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, + cur_state.polygon_offset.clamp), + std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); } else { - glPolygonOffset(polygon_offset.factor, polygon_offset.units); UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, "Unimplemented Depth polygon offset clamp."); + glPolygonOffset(polygon_offset.factor, polygon_offset.units); } } } @@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const { bool has_delta{}; std::size_t first{}; std::size_t last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures; + std::array<GLuint, Maxwell::NumTextureSamplers> textures; for (std::size_t i = 0; i < std::size(texture_units); ++i) { const auto& texture_unit = texture_units[i]; - const auto& cur_state_texture_unit = cur_state.texture_units[i]; + auto& cur_state_texture_unit = cur_state.texture_units[i]; textures[i] = texture_unit.texture; - - if (textures[i] != cur_state_texture_unit.texture) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (cur_state_texture_unit.texture == textures[i]) + continue; + cur_state_texture_unit.texture = textures[i]; + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } - if (has_delta) { glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), textures.data() + first); @@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const { bool has_delta{}; std::size_t first{}; std::size_t last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; + std::array<GLuint, Maxwell::NumTextureSamplers> samplers; + for (std::size_t i = 0; i < std::size(samplers); ++i) { + if (cur_state.texture_units[i].sampler == texture_units[i].sampler) + continue; + cur_state.texture_units[i].sampler = texture_units[i].sampler; samplers[i] = texture_units[i].sampler; - if (samplers[i] != cur_state.texture_units[i].sampler) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } if (has_delta) { glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), @@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const { } } -void OpenGLState::ApplyFramebufferState() const { - if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); - } - if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); - } -} - -void OpenGLState::ApplyVertexArrayState() const { - if (draw.vertex_array != cur_state.draw.vertex_array) { - glBindVertexArray(draw.vertex_array); - } -} - -void OpenGLState::ApplyDepthClamp() const { - if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && - depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { - return; - } - UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, - "Unimplemented Depth Clamp Separation!"); - - if (depth_clamp.far_plane || depth_clamp.near_plane) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } -} - void OpenGLState::Apply() const { ApplyFramebufferState(); ApplyVertexArrayState(); - - // Shader program - if (draw.shader_program != cur_state.draw.shader_program) { - glUseProgram(draw.shader_program); - } - - // Program pipeline - if (draw.program_pipeline != cur_state.draw.program_pipeline) { - glBindProgramPipeline(draw.program_pipeline); - } - // Clip distance - for (std::size_t i = 0; i < clip_distance.size(); ++i) { - if (clip_distance[i] != cur_state.clip_distance[i]) { - if (clip_distance[i]) { - glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); - } else { - glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); - } - } - } - // Point - if (point.size != cur_state.point.size) { - glPointSize(point.size); - } - if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) { - glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, - fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); - } - if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) { - if (multisample_control.alpha_to_coverage) { - glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE); - } else { - glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE); - } - } - if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) { - if (multisample_control.alpha_to_one) { - glEnable(GL_SAMPLE_ALPHA_TO_ONE); - } else { - glDisable(GL_SAMPLE_ALPHA_TO_ONE); - } - } + ApplyShaderProgram(); + ApplyProgramPipeline(); + ApplyClipDistances(); + ApplyPointSize(); + ApplyFragmentColorClamp(); + ApplyMultisample(); ApplyDepthClamp(); ApplyColorMask(); ApplyViewport(); @@ -574,7 +531,28 @@ void OpenGLState::Apply() const { ApplyTextures(); ApplySamplers(); ApplyPolygonOffset(); - cur_state = *this; +} + +void OpenGLState::EmulateViewportWithScissor() { + auto& current = viewports[0]; + if (current.scissor.enabled) { + const GLint left = std::max(current.x, current.scissor.x); + const GLint right = + std::max(current.x + current.width, current.scissor.x + current.scissor.width); + const GLint bottom = std::max(current.y, current.scissor.y); + const GLint top = + std::max(current.y + current.height, current.scissor.y + current.scissor.height); + current.scissor.x = std::max(left, 0); + current.scissor.y = std::max(bottom, 0); + current.scissor.width = std::max(right - left, 0); + current.scissor.height = std::max(top - bottom, 0); + } else { + current.scissor.enabled = true; + current.scissor.x = current.x; + current.scissor.y = current.y; + current.scissor.width = current.width; + current.scissor.height = current.height; + } } OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 9e1eda5b1..41418a7b8 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -54,10 +54,6 @@ public: } depth_clamp; // GL_DEPTH_CLAMP struct { - bool enabled; // viewports arrays are only supported when geometry shaders are enabled. - } geometry_shaders; - - struct { bool enabled; // GL_CULL_FACE GLenum mode; // GL_CULL_FACE_MODE GLenum front_face; // GL_FRONT_FACE @@ -184,34 +180,26 @@ public: static OpenGLState GetCurState() { return cur_state; } + static bool GetsRGBUsed() { return s_rgb_used; } + static void ClearsRGBUsed() { s_rgb_used = false; } + /// Apply this state as the current OpenGL state void Apply() const; - /// Apply only the state affecting the framebuffer + void ApplyFramebufferState() const; - /// Apply only the state affecting the vertex array void ApplyVertexArrayState() const; - /// Set the initial OpenGL state - static void ApplyDefaultState(); - /// Resets any references to the given resource - OpenGLState& UnbindTexture(GLuint handle); - OpenGLState& ResetSampler(GLuint handle); - OpenGLState& ResetProgram(GLuint handle); - OpenGLState& ResetPipeline(GLuint handle); - OpenGLState& ResetVertexArray(GLuint handle); - OpenGLState& ResetFramebuffer(GLuint handle); - void EmulateViewportWithScissor(); - -private: - static OpenGLState cur_state; - // Workaround for sRGB problems caused by - // QT not supporting srgb output - static bool s_rgb_used; + void ApplyShaderProgram() const; + void ApplyProgramPipeline() const; + void ApplyClipDistances() const; + void ApplyPointSize() const; + void ApplyFragmentColorClamp() const; + void ApplyMultisample() const; void ApplySRgb() const; void ApplyCulling() const; void ApplyColorMask() const; @@ -227,6 +215,26 @@ private: void ApplySamplers() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; + + /// Set the initial OpenGL state + static void ApplyDefaultState(); + + /// Resets any references to the given resource + OpenGLState& UnbindTexture(GLuint handle); + OpenGLState& ResetSampler(GLuint handle); + OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetPipeline(GLuint handle); + OpenGLState& ResetVertexArray(GLuint handle); + OpenGLState& ResetFramebuffer(GLuint handle); + + /// Viewport does not affects glClearBuffer so emulate viewport using scissor test + void EmulateViewportWithScissor(); + +private: + static OpenGLState cur_state; + + // Workaround for sRGB problems caused by QT not supporting srgb output + static bool s_rgb_used; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a01efeb05..d69cba9c3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <cstddef> #include <cstdlib> -#include <cstring> #include <memory> #include <glad/glad.h> #include "common/assert.h" diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index d84634cb3..84a987371 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -5,11 +5,39 @@ #include <string> #include <fmt/format.h> #include <glad/glad.h> +#include "common/assert.h" #include "common/common_types.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { +BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} + +BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; + +void BindBuffersRangePushBuffer::Setup(GLuint first_) { + first = first_; + buffers.clear(); + offsets.clear(); + sizes.clear(); +} + +void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { + buffers.push_back(buffer); + offsets.push_back(offset); + sizes.push_back(size); +} + +void BindBuffersRangePushBuffer::Bind() const { + const std::size_t count{buffers.size()}; + DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); + if (count == 0) { + return; + } + glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), + sizes.data()); +} + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) { if (!GLAD_GL_KHR_debug) { return; // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 1fcb6fc11..aef45c9dc 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -5,11 +5,31 @@ #pragma once #include <string> +#include <vector> #include <glad/glad.h> #include "common/common_types.h" namespace OpenGL { +class BindBuffersRangePushBuffer { +public: + BindBuffersRangePushBuffer(GLenum target); + ~BindBuffersRangePushBuffer(); + + void Setup(GLuint first_); + + void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); + + void Bind() const; + +private: + GLenum target; + GLuint first; + std::vector<GLuint> buffers; + std::vector<GLintptr> offsets; + std::vector<GLsizeiptr> sizes; +}; + void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = ""); } // namespace OpenGL
\ No newline at end of file |
