diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
22 files changed, 689 insertions, 666 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b3062e5ba..7989ec11b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,30 +7,33 @@ #include "common/alignment.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" namespace OpenGL { +CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, + std::size_t alignment, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, + alignment{alignment} {} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} -GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment, bool cache) { +GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, + bool cache) { auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - ASSERT_MSG(cpu_addr, "Invalid GPU address"); // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. cache &= size >= 2048; + const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; if (cache) { - auto entry = TryGet(*cpu_addr); + auto entry = TryGet(host_ptr); if (entry) { - if (entry->size >= size && entry->alignment == alignment) { - return entry->offset; + if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { + return entry->GetOffset(); } Unregister(entry); } @@ -39,17 +42,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size AlignBuffer(alignment); const GLintptr uploaded_offset = buffer_offset; - Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + if (!host_ptr) { + return uploaded_offset; + } + std::memcpy(buffer_ptr, host_ptr, size); buffer_ptr += size; buffer_offset += size; if (cache) { - auto entry = std::make_shared<CachedBufferEntry>(); - entry->offset = uploaded_offset; - entry->size = size; - entry->alignment = alignment; - entry->addr = *cpu_addr; + auto entry = std::make_shared<CachedBufferEntry>( + *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); Register(entry); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index c11acfb79..fc33aa433 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -17,22 +17,39 @@ namespace OpenGL { class RasterizerOpenGL; -struct CachedBufferEntry final : public RasterizerCacheObject { - VAddr GetAddr() const override { - return addr; +class CachedBufferEntry final : public RasterizerCacheObject { +public: + explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, + std::size_t alignment, u8* host_ptr); + + VAddr GetCpuAddr() const override { + return cpu_addr; } std::size_t GetSizeInBytes() const override { return size; } + std::size_t GetSize() const { + return size; + } + + GLintptr GetOffset() const { + return offset; + } + + std::size_t GetAlignment() const { + return alignment; + } + // We do not have to flush this cache as things in it are never modified by us. void Flush() override {} - VAddr addr; - std::size_t size; - GLintptr offset; - std::size_t alignment; +private: + VAddr cpu_addr{}; + std::size_t size{}; + GLintptr offset{}; + std::size_t alignment{}; }; class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { @@ -41,7 +58,7 @@ public: /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been /// allocated. - GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool cache = true); /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 7161d1dea..5842d6213 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -4,10 +4,8 @@ #include <glad/glad.h> -#include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" @@ -15,12 +13,13 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { buffer.Create(); // Bind and unbind the buffer so it gets allocated by the driver glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); + LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } void CachedGlobalRegion::Reload(u32 size_) { @@ -35,10 +34,10 @@ void CachedGlobalRegion::Reload(u32 size_) { // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); + glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); } -GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { +GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { const auto search{reserve.find(addr)}; if (search == reserve.end()) { return {}; @@ -46,11 +45,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { - GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, + u8* host_ptr) { + GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; if (!region) { // No reserved surface available, create a new one and reserve it - region = std::make_shared<CachedGlobalRegion>(addr, size); + auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; + const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); + region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); ReserveGlobalRegion(region); } region->Reload(size); @@ -58,7 +60,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si } void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { - reserve.insert_or_assign(region->GetAddr(), std::move(region)); + reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) @@ -69,22 +71,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { auto& gpu{Core::System::GetInstance().GPU()}; - const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; - const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( - cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); - ASSERT(cbuf_addr); - - const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); - const auto size = Memory::Read32(*cbuf_addr + 8); - const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); - ASSERT(actual_addr); + auto& memory_manager{gpu.MemoryManager()}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; + const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + + global_region.GetCbufOffset()}; + const auto actual_addr{memory_manager.Read<u64>(addr)}; + const auto size{memory_manager.Read<u32>(addr + 8)}; // Look up global region in the cache based on address - GlobalRegion region = TryGet(*actual_addr); + const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; + GlobalRegion region{TryGet(host_ptr)}; if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(*actual_addr, size); + region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index ba2bdc60c..5a21ab66f 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr addr, u32 size); + explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); - /// Gets the address of the shader in guest memory, required for cache management - VAddr GetAddr() const override { - return addr; + VAddr GetCpuAddr() const override { + return cpu_addr; } - /// Gets the size of the shader in guest memory, required for cache management std::size_t GetSizeInBytes() const override { return size; } @@ -53,9 +51,8 @@ public: } private: - VAddr addr{}; + VAddr cpu_addr{}; u32 size{}; - OGLBuffer buffer; }; @@ -68,11 +65,11 @@ public: Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); private: - GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); + GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; + GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); void ReserveGlobalRegion(GlobalRegion region); - std::unordered_map<VAddr, GlobalRegion> reserve; + std::unordered_map<CacheAddr, GlobalRegion> reserve; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp index 77d5cedd2..c3e94d917 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -7,7 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" @@ -40,16 +40,12 @@ GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { return index_offset; } -GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, - u32 count) { +GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) { const std::size_t map_size{CalculateQuadSize(count)}; auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; - ASSERT_MSG(cpu_addr, "Invalid GPU address"); - - const u8* source{Memory::GetPointer(*cpu_addr)}; + const u8* source{memory_manager.GetPointer(gpu_addr)}; for (u32 primitive = 0; primitive < count / 4; ++primitive) { for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { @@ -64,4 +60,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size return index_offset; } -} // namespace OpenGL
\ No newline at end of file +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h index a8cb88eb5..4e87ce4d6 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -4,11 +4,9 @@ #pragma once -#include <vector> #include <glad/glad.h> #include "common/common_types.h" -#include "video_core/memory_manager.h" namespace OpenGL { @@ -24,7 +22,7 @@ public: GLintptr MakeQuadArray(u32 first, u32 count); - GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); + GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count); private: OGLBufferCache& buffer_cache; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 976f64c24..7ff1e6737 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -17,7 +17,6 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" @@ -26,7 +25,6 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/video_core.h" namespace OpenGL { @@ -100,11 +98,9 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, - ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, - emu_window{window}, system{system}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE) { +RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) + : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -225,8 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { if (!vertex_array.IsEnabled()) continue; - const Tegra::GPUVAddr start = vertex_array.StartAddress(); - const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + const GPUVAddr start = vertex_array.StartAddress(); + const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); ASSERT(end > start); const u64 size = end - start + 1; @@ -320,7 +316,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu.state.shader_stages[stage]); + ubo.SetFromRegs(gpu, stage); const GLintptr offset = buffer_cache.UploadHostMemory( &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); @@ -421,8 +417,8 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { if (!regs.vertex_array[index].IsEnabled()) continue; - const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); - const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + const GPUVAddr start = regs.vertex_array[index].StartAddress(); + const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); ASSERT(end > start); size += end - start + 1; @@ -449,7 +445,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { return boost::make_iterator_range(map.equal_range(interval)); } -void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { +void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; @@ -747,20 +743,26 @@ void RasterizerOpenGL::DrawArrays() { void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (!addr || !size) { + return; + } res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); + if (!addr || !size) { + return; + } res_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); global_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } @@ -782,7 +784,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; + const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; if (!surface) { return {}; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ca3de0592..54fbf48aa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -12,15 +12,12 @@ #include <optional> #include <tuple> #include <utility> -#include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" @@ -29,10 +26,8 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" namespace Core { class System; @@ -50,16 +45,15 @@ struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, - ScreenInfo& info); + explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); ~RasterizerOpenGL() override; void DrawArrays() override; void Clear() override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(CacheAddr addr, u64 size) override; + void InvalidateRegion(CacheAddr addr, u64 size) override; + void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Common::Rectangle<u32>& src_rect, @@ -67,7 +61,7 @@ public: bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; - void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -214,7 +208,6 @@ private: ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; - Core::Frontend::EmuWindow& emu_window; Core::System& system; ScreenInfo& screen_info; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd1409660..5876145ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -13,7 +13,6 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/hle/kernel/process.h" -#include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/morton.h" @@ -55,12 +54,11 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { } } -void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { +void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) { auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)}; - addr = cpu_addr ? *cpu_addr : 0; gpu_addr = gpu_addr_; + host_ptr = memory_manager.GetPointer(gpu_addr_); size_in_bytes = SizeInBytesRaw(); if (IsPixelFormatASTC(pixel_format)) { @@ -223,7 +221,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, } /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( - u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, + u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, u32 block_width, u32 block_height, u32 block_depth, Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { SurfaceParams params{}; @@ -446,7 +444,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, - gl_buffer.data() + offset_gl, params.addr + offset); + gl_buffer.data() + offset_gl, params.host_ptr + offset); offset += layer_size; offset_gl += gl_size; } @@ -455,7 +453,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, - gl_buffer.data(), params.addr + offset); + gl_buffer.data(), params.host_ptr + offset); } } @@ -513,9 +511,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac "reinterpretation but the texture is tiled."); } const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; - + auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, - Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); + memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -563,8 +561,14 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } CachedSurface::CachedSurface(const SurfaceParams& params) - : params(params), gl_target(SurfaceTargetToGL(params.target)), - cached_size_in_bytes(params.size_in_bytes) { + : RasterizerCacheObject{params.host_ptr}, params{params}, + gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} { + + const auto optional_cpu_addr{ + Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)}; + ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid"); + cpu_addr = *optional_cpu_addr; + texture.Create(gl_target); // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) @@ -603,19 +607,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) ApplyTextureDefaults(texture.handle, params.max_mip_level); - OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); - - // Clamp size to mapped GPU memory region - // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 - // R32F render buffer. We do not yet know if this is a game bug or something else, but this - // check is necessary to prevent flushing from overwriting unmapped memory. - - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const u64 max_size{memory_manager.GetRegionEnd(params.gpu_addr) - params.gpu_addr}; - if (cached_size_in_bytes > max_size) { - LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); - cached_size_in_bytes = max_size; - } + OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); @@ -633,10 +625,9 @@ void CachedSurface::LoadGLBuffer() { const u32 bpp = params.GetFormatBpp() / 8; const u32 copy_size = params.width * bpp; if (params.pitch == copy_size) { - std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), - params.size_in_bytes_gl); + std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); } else { - const u8* start = Memory::GetPointer(params.addr); + const u8* start{params.host_ptr}; u8* write_to = gl_buffer[0].data(); for (u32 h = params.height; h > 0; h--) { std::memcpy(write_to, start, copy_size); @@ -670,8 +661,8 @@ void CachedSurface::FlushGLBuffer() { gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0); + const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); + glPixelStorei(GL_PACK_ALIGNMENT, align); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); ASSERT(!tuple.compressed); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -680,8 +671,6 @@ void CachedSurface::FlushGLBuffer() { glPixelStorei(GL_PACK_ROW_LENGTH, 0); Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, params.height, params.depth, true, true); - const u8* const texture_src_data = Memory::GetPointer(params.addr); - ASSERT(texture_src_data); if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", params.block_width, static_cast<u32>(params.target)); @@ -691,9 +680,9 @@ void CachedSurface::FlushGLBuffer() { const u32 bpp = params.GetFormatBpp() / 8; const u32 copy_size = params.width * bpp; if (params.pitch == copy_size) { - std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); + std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); } else { - u8* start = Memory::GetPointer(params.addr); + u8* start{params.host_ptr}; const u8* read_to = gl_buffer[0].data(); for (u32 h = params.height; h > 0; h--) { std::memcpy(start, read_to, copy_size); @@ -718,8 +707,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0); + const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); + glPixelStorei(GL_UNPACK_ALIGNMENT, align); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); @@ -927,12 +916,12 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { - if (params.addr == 0 || params.height * params.width == 0) { + if (!params.IsValid()) { return {}; } // Look up surface in the cache based on address - Surface surface{TryGet(params.addr)}; + Surface surface{TryGet(params.host_ptr)}; if (surface) { if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { // Use the cached surface as-is unless it's not synced with memory @@ -943,7 +932,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one Surface new_surface{RecreateSurface(surface, params)}; - UnregisterSurface(surface); + Unregister(surface); Register(new_surface); if (new_surface->IsUploaded()) { RegisterReinterpretSurface(new_surface); @@ -951,7 +940,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - UnregisterSurface(surface); + Unregister(surface); } } @@ -981,14 +970,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface) { const auto& init_params{src_surface->GetSurfaceParams()}; const auto& dst_params{dst_surface->GetSurfaceParams()}; - VAddr address = init_params.addr; - const std::size_t layer_size = dst_params.LayerMemorySize(); + auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; + GPUVAddr address{init_params.gpu_addr}; + const std::size_t layer_size{dst_params.LayerMemorySize()}; for (u32 layer = 0; layer < dst_params.depth; layer++) { for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { - const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); - const Surface& copy = TryGet(sub_address); - if (!copy) + const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; + const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; + if (!copy) { continue; + } const auto& src_params{copy->GetSurfaceParams()}; const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; @@ -1163,7 +1154,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, const auto& dst_params{dst_surface->GetSurfaceParams()}; // Flush enough memory for both the source and destination surface - FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); + FlushRegion(ToCacheAddr(src_params.host_ptr), + std::max(src_params.MemorySize(), dst_params.MemorySize())); LoadSurface(dst_surface); } @@ -1215,8 +1207,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } -Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { - return TryGet(addr); +Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { + return TryGet(host_ptr); } void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { @@ -1243,9 +1235,9 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar return {}; } -static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { - const std::size_t size = params.LayerMemorySize(); - VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); +static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) { + const std::size_t size{params.LayerMemorySize()}; + GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; for (u32 i = 0; i < params.depth; i++) { if (start == addr) { return {i}; @@ -1267,7 +1259,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa src_params.height == dst_params.MipHeight(*level) && src_params.block_height >= dst_params.MipBlockHeight(*level)) { const std::optional<u32> slot = - TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); + TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); if (slot.has_value()) { glCopyImageSubData(render_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, @@ -1283,8 +1275,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa } static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { - const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); - const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); + const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); + const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); if (bound2 > bound1) return true; const auto& dst_params = blitted_surface->GetSurfaceParams(); @@ -1302,12 +1294,12 @@ static bool IsReinterpretInvalidSecond(const Surface render_surface, bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, Surface intersect) { if (IsReinterpretInvalid(triggering_surface, intersect)) { - UnregisterSurface(intersect); + Unregister(intersect); return false; } if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { - UnregisterSurface(intersect); + Unregister(intersect); return false; } FlushObject(intersect); @@ -1327,7 +1319,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() { void RasterizerCacheOpenGL::SignalPostDrawCall() { for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { if (current_color_buffers[i] != nullptr) { - Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); + Surface intersect = + CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); if (intersect != nullptr) { PartialReinterpretSurface(current_color_buffers[i], intersect); texception = true; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9cf6f50be..db280dbb3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -5,13 +5,13 @@ #pragma once #include <array> -#include <map> #include <memory> #include <string> -#include <unordered_set> +#include <tuple> #include <vector> #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_types.h" #include "common/hash.h" #include "common/math_util.h" @@ -109,6 +109,11 @@ struct SurfaceParams { return size; } + /// Returns true if the parameters constitute a valid rasterizer surface. + bool IsValid() const { + return gpu_addr && host_ptr && height && width; + } + /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including /// mipmaps. std::size_t LayerMemorySize() const { @@ -201,6 +206,13 @@ struct SurfaceParams { return bd; } + u32 RowAlign(u32 mip_level) const { + const u32 m_width = MipWidth(mip_level); + const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); + const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); + return (1U << l2); + } + /// Creates SurfaceParams from a texture configuration static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry); @@ -210,7 +222,7 @@ struct SurfaceParams { /// Creates SurfaceParams for a depth buffer configuration static SurfaceParams CreateForDepthBuffer( - u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, + u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, u32 block_width, u32 block_height, u32 block_depth, Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); @@ -232,7 +244,7 @@ struct SurfaceParams { } /// Initializes parameters for caching, should be called after everything has been initialized - void InitCacheParameters(Tegra::GPUVAddr gpu_addr); + void InitCacheParameters(GPUVAddr gpu_addr); std::string TargetName() const { switch (target) { @@ -296,8 +308,8 @@ struct SurfaceParams { bool is_array; bool srgb_conversion; // Parameters used for caching - VAddr addr; - Tegra::GPUVAddr gpu_addr; + u8* host_ptr; + GPUVAddr gpu_addr; std::size_t size_in_bytes; std::size_t size_in_bytes_gl; @@ -345,10 +357,10 @@ class RasterizerOpenGL; class CachedSurface final : public RasterizerCacheObject { public: - CachedSurface(const SurfaceParams& params); + explicit CachedSurface(const SurfaceParams& params); - VAddr GetAddr() const override { - return params.addr; + VAddr GetCpuAddr() const override { + return cpu_addr; } std::size_t GetSizeInBytes() const override { @@ -432,6 +444,7 @@ private: std::size_t memory_size; bool reinterpreted = false; bool must_reload = false; + VAddr cpu_addr{}; }; class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { @@ -449,7 +462,7 @@ public: Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); /// Tries to find a framebuffer using on the provided CPU address - Surface TryFindFramebufferSurface(VAddr addr) const; + Surface TryFindFramebufferSurface(const u8* host_ptr) const; /// Copies the contents of one surface to another void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, @@ -506,12 +519,12 @@ private: std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; - using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; + using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; using SurfaceInterval = typename SurfaceIntervalCache::interval_type; static auto GetReinterpretInterval(const Surface& object) { - return SurfaceInterval::right_open(object->GetAddr() + 1, - object->GetAddr() + object->GetMemorySize() - 1); + return SurfaceInterval::right_open(object->GetCacheAddr() + 1, + object->GetCacheAddr() + object->GetMemorySize() - 1); } // Reinterpreted surfaces are very fragil as the game may keep rendering into them. @@ -523,7 +536,7 @@ private: reinterpret_surface->MarkReinterpreted(); } - Surface CollideOnReinterpretedSurface(VAddr addr) const { + Surface CollideOnReinterpretedSurface(CacheAddr addr) const { const SurfaceInterval interval{addr}; for (auto& pair : boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { @@ -532,13 +545,17 @@ private: return nullptr; } + void Register(const Surface& object) override { + RasterizerCache<Surface>::Register(object); + } + /// Unregisters an object from the cache - void UnregisterSurface(const Surface& object) { + void Unregister(const Surface& object) override { if (object->IsReinterpreted()) { auto interval = GetReinterpretInterval(object); reinterpreted_surfaces.erase(interval); } - Unregister(object); + RasterizerCache<Surface>::Unregister(object); } }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4883e4f62..ab381932c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -6,13 +6,11 @@ #include "common/assert.h" #include "common/hash.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/shader/shader_ir.h" @@ -32,19 +30,20 @@ struct UnspecializedShader { namespace { /// Gets the address for the specified shader stage program -VAddr GetShaderAddress(Maxwell::ShaderProgram program) { - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; - const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + - shader_config.offset); - ASSERT_MSG(address, "Invalid GPU address"); - return *address; +GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { + const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; + const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; + return gpu.regs.code_address.CodeAddress() + shader_config.offset; } /// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(VAddr addr) { +ProgramCode GetShaderCode(const u8* host_ptr) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); + ASSERT_OR_EXECUTE(host_ptr != nullptr, { + std::fill(program_code.begin(), program_code.end(), 0); + return program_code; + }); + std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); return program_code; } @@ -214,12 +213,13 @@ std::set<GLenum> GetSupportedFormats() { } // namespace -CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, +CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - ProgramCode&& program_code, ProgramCode&& program_code_b) - : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, - disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { + ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, + unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, + precompiled_programs{precompiled_programs} { const std::size_t code_size = CalculateProgramSize(program_code); const std::size_t code_size_b = @@ -243,12 +243,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro disk_cache.SaveRaw(raw); } -CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, +CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - GLShader::ProgramResult result) - : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, - disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { + GLShader::ProgramResult result, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, + program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{ + precompiled_programs} { code = std::move(result.first); entries = result.second; @@ -271,7 +272,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); } - LabelGLObject(GL_PROGRAM, program->handle, addr); + LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); } handle = program->handle; @@ -323,7 +324,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); } - LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); + LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); return target_program->handle; }; @@ -486,29 +487,32 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { return last_shaders[static_cast<u32>(program)]; } - const VAddr program_addr{GetShaderAddress(program)}; + auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(program)}; // Look up shader in the cache based on address - Shader shader{TryGet(program_addr)}; + const auto& host_ptr{memory_manager.GetPointer(program_addr)}; + Shader shader{TryGet(host_ptr)}; if (!shader) { // No shader found - create a new one - ProgramCode program_code = GetShaderCode(program_addr); + ProgramCode program_code{GetShaderCode(host_ptr)}; ProgramCode program_code_b; if (program == Maxwell::ShaderProgram::VertexA) { - program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); + program_code_b = GetShaderCode( + memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); } const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); - + const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; const auto found = precompiled_shaders.find(unique_identifier); if (found != precompiled_shaders.end()) { shader = - std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second); + std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, + precompiled_programs, found->second, host_ptr); } else { shader = std::make_shared<CachedShader>( - program_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b)); + cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + std::move(program_code), std::move(program_code_b), host_ptr); } Register(shader); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 97eed192f..0cf8e0b3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,21 +5,20 @@ #pragma once #include <array> +#include <atomic> #include <memory> #include <set> #include <tuple> #include <unordered_map> +#include <vector> #include <glad/glad.h> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" namespace Core { class System; @@ -39,18 +38,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; class CachedShader final : public RasterizerCacheObject { public: - explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, + explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - ProgramCode&& program_code, ProgramCode&& program_code_b); + ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); - explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, + explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - GLShader::ProgramResult result); + GLShader::ProgramResult result, u8* host_ptr); - VAddr GetAddr() const override { - return addr; + VAddr GetCpuAddr() const override { + return cpu_addr; } std::size_t GetSizeInBytes() const override { @@ -91,7 +90,8 @@ private: ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; - VAddr addr{}; + u8* host_ptr{}; + VAddr cpu_addr{}; u64 unique_identifier{}; Maxwell::ShaderProgram program_type{}; ShaderDiskCacheOpenGL& disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 11d1169f0..3ea08ef7b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -21,6 +21,8 @@ namespace OpenGL::GLShader { +namespace { + using Tegra::Shader::Attribute; using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; @@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; +enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; + +struct TextureAoffi {}; +using TextureArgument = std::pair<Type, Node>; +using TextureIR = std::variant<TextureAoffi, TextureArgument>; + enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); -enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; - class ShaderWriter { public: void AddExpression(std::string_view text) { @@ -69,10 +75,10 @@ public: shader_source += '\n'; } - std::string GenerateTemporal() { - std::string temporal = "tmp"; - temporal += std::to_string(temporal_index++); - return temporal; + std::string GenerateTemporary() { + std::string temporary = "tmp"; + temporary += std::to_string(temporary_index++); + return temporary; } std::string GetResult() { @@ -87,11 +93,11 @@ private: } std::string shader_source; - u32 temporal_index = 1; + u32 temporary_index = 1; }; /// Generates code to use for a swizzle operation. -static std::string GetSwizzle(u32 elem) { +std::string GetSwizzle(u32 elem) { ASSERT(elem <= 3); std::string swizzle = "."; swizzle += "xyzw"[elem]; @@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) { } /// Translate topology -static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { +std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { case Tegra::Shader::OutputTopology::PointList: return "points"; @@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { } /// Returns true if an object has to be treated as precise -static bool IsPrecise(Operation operand) { +bool IsPrecise(Operation operand) { const auto& meta = operand.GetMeta(); if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { @@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) { return false; } -static bool IsPrecise(Node node) { +bool IsPrecise(Node node) { if (const auto operation = std::get_if<OperationNode>(node)) { return IsPrecise(*operation); } @@ -426,9 +432,14 @@ private: std::string Visit(Node node) { if (const auto operation = std::get_if<OperationNode>(node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); + if (operation_index >= operation_decompilers.size()) { + UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); + return {}; + } const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); + UNREACHABLE_MSG("Undefined operation: {}", operation_index); + return {}; } return (this->*decompiler)(*operation); @@ -540,7 +551,7 @@ private: } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access - const std::string final_offset = code.GenerateTemporal(); + const std::string final_offset = code.GenerateTemporary(); code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), @@ -587,9 +598,9 @@ private: // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; - const std::string temporal = code.GenerateTemporal(); - code.AddLine(precise + "float " + temporal + " = " + value + ';'); - return temporal; + const std::string temporary = code.GenerateTemporary(); + code.AddLine(precise + "float " + temporary + " = " + value + ';'); + return temporary; } std::string VisitOperand(Operation operation, std::size_t operand_index) { @@ -601,9 +612,9 @@ private: return Visit(operand); } - const std::string temporal = code.GenerateTemporal(); - code.AddLine("float " + temporal + " = " + Visit(operand) + ';'); - return temporal; + const std::string temporary = code.GenerateTemporary(); + code.AddLine("float " + temporary + " = " + Visit(operand) + ';'); + return temporary; } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { @@ -718,8 +729,8 @@ private: result_type)); } - std::string GenerateTexture(Operation operation, const std::string& func, - const std::vector<std::pair<Type, Node>>& extras) { + std::string GenerateTexture(Operation operation, const std::string& function_suffix, + const std::vector<TextureIR>& extras) { constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -729,11 +740,11 @@ private: const bool has_array = meta->sampler.IsArray(); const bool has_shadow = meta->sampler.IsShadow(); - std::string expr = func; - expr += '('; - expr += GetSampler(meta->sampler); - expr += ", "; - + std::string expr = "texture" + function_suffix; + if (!meta->aoffi.empty()) { + expr += "Offset"; + } + expr += '(' + GetSampler(meta->sampler) + ", "; expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { @@ -751,36 +762,74 @@ private: } expr += ')'; - for (const auto& extra_pair : extras) { - const auto [type, operand] = extra_pair; - if (operand == nullptr) { - continue; + for (const auto& variant : extras) { + if (const auto argument = std::get_if<TextureArgument>(&variant)) { + expr += GenerateTextureArgument(*argument); + } else if (std::get_if<TextureAoffi>(&variant)) { + expr += GenerateTextureAoffi(meta->aoffi); + } else { + UNREACHABLE(); } - expr += ", "; + } - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - expr += "ftoi(" + Visit(operand) + ')'; - } - break; - case Type::Float: - expr += Visit(operand); - break; - default: { - const auto type_int = static_cast<u32>(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; + return expr + ')'; + } + + std::string GenerateTextureArgument(TextureArgument argument) { + const auto [type, operand] = argument; + if (operand == nullptr) { + return {}; + } + + std::string expr = ", "; + switch (type) { + case Type::Int: + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + // Inline the string as an immediate integer in GLSL (some extra arguments are + // required to be constant) + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(operand) + ')'; } + break; + case Type::Float: + expr += Visit(operand); + break; + default: { + const auto type_int = static_cast<u32>(type); + UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); + expr += '0'; + break; + } + } + return expr; + } + + std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { + if (aoffi.empty()) { + return {}; + } + constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; + std::string expr = ", "; + expr += coord_constructors.at(aoffi.size() - 1); + expr += '('; + + for (std::size_t index = 0; index < aoffi.size(); ++index) { + const auto operand{aoffi.at(index)}; + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + // Inline the string as an immediate integer in GLSL (AOFFI arguments are required + // to be constant by the standard). + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(operand) + ')'; + } + if (index + 1 < aoffi.size()) { + expr += ", "; } } + expr += ')'; - return expr + ')'; + return expr; } std::string Assign(Operation operation) { @@ -1159,7 +1208,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); + std::string expr = GenerateTexture( + operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1170,7 +1220,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); + std::string expr = GenerateTexture( + operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1182,7 +1233,8 @@ private: ASSERT(meta); const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; - return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + + return GenerateTexture(operation, "Gather", + {TextureArgument{type, meta->component}, TextureAoffi{}}) + GetSwizzle(meta->element); } @@ -1196,11 +1248,12 @@ private: switch (meta->element) { case 0: case 1: - return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element); + return "itof(int(textureSize(" + sampler + ", " + lod + ')' + + GetSwizzle(meta->element) + "))"; case 2: return "0"; case 3: - return "textureQueryLevels(" + sampler + ')'; + return "itof(textureQueryLevels(" + sampler + "))"; } UNREACHABLE(); return "0"; @@ -1211,8 +1264,8 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + - " * vec2(256))" + GetSwizzle(meta->element) + "))"; + return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + + GetSwizzle(meta->element) + "))"; } return "0"; } @@ -1565,6 +1618,8 @@ private: ShaderWriter code; }; +} // Anonymous namespace + std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 72aca4938..4e04ab2f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,7 +5,6 @@ #pragma once #include <array> -#include <set> #include <string> #include <utility> #include <vector> diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 82fc4d44b..8a43eb157 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -4,7 +4,6 @@ #include <cstring> #include <fmt/format.h> -#include <lz4.h> #include "common/assert.h" #include "common/common_paths.h" @@ -12,6 +11,7 @@ #include "common/file_util.h" #include "common/logging/log.h" #include "common/scm_rev.h" +#include "common/zstd_compression.h" #include "core/core.h" #include "core/hle/kernel/process.h" @@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -template <typename T> -std::vector<u8> CompressData(const T* source, std::size_t source_size) { - if (source_size > LZ4_MAX_INPUT_SIZE) { - // Source size exceeds LZ4 maximum input size - return {}; - } - const auto source_size_int = static_cast<int>(source_size); - const int max_compressed_size = LZ4_compressBound(source_size_int); - std::vector<u8> compressed(max_compressed_size); - const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source), - reinterpret_cast<char*>(compressed.data()), - source_size_int, max_compressed_size); - if (compressed_size <= 0) { - // Compression failed - return {}; - } - compressed.resize(compressed_size); - return compressed; -} - -std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) { - std::vector<u8> uncompressed(uncompressed_size); - const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()), - reinterpret_cast<char*>(uncompressed.data()), - static_cast<int>(compressed.size()), - static_cast<int>(uncompressed.size())); - if (static_cast<int>(uncompressed_size) != size_check) { - // Decompression failed - return {}; - } - return uncompressed; -} - } // namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, @@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - dump.binary = DecompressData(compressed_binary, binary_length); + dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary); if (dump.binary.empty()) { return {}; } @@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - const std::vector<u8> code = DecompressData(compressed_code, code_size); + const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code); if (code.empty()) { return {}; } @@ -507,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str if (!IsUsable()) return; - const std::vector<u8> compressed_code{CompressData(code.data(), code.size())}; + const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault( + reinterpret_cast<const u8*>(code.data()), code.size())}; if (compressed_code.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", unique_identifier); @@ -537,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size()); + const std::vector<u8> compressed_binary = + Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size()); + if (compressed_binary.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", usage.unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7d96649af..8763d9c71 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <fmt/format.h> -#include "common/assert.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fba8e681b..fad346b48 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,12 +4,9 @@ #pragma once -#include <array> -#include <string> #include <vector> #include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 6a30c28d2..eaf3e03a0 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -2,15 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/core.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { -void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - const auto& regs = gpu.regs; - const auto& state = gpu.state; +using Tegra::Engines::Maxwell3D; + +void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { + const auto& regs = maxwell.regs; + const auto& state = maxwell.state; // TODO(bunnei): Support more than one viewport viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; @@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh u32 func = static_cast<u32>(regs.alpha_test_func); // Normalize the gl variants of opCompare to be the same as the normal variants - u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never); + const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); if (func >= op_gl_variant_base) { func = func - op_gl_variant_base + 1U; } @@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh // Assign in which stage the position has to be flipped // (the last stage before the fragment shader). - if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { - flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); + constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); + if (maxwell.regs.shader_config[geometry_index].enable) { + flip_stage = geometry_index; } else { flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 4970aafed..37dcfefdb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,14 +12,13 @@ namespace OpenGL::GLShader { -using Tegra::Engines::Maxwell3D; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -// Not following that rule will cause problems on some AMD drivers. +/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at +/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +/// Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { - void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); + void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage); + alignas(16) GLvec4 viewport_flip; struct alignas(16) { GLuint instance_id; @@ -63,7 +62,6 @@ public: UpdatePipeline(); state.draw.shader_program = 0; state.draw.program_pipeline = pipeline.handle; - state.geometry_shaders.enabled = (gs != 0); } private: diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9419326a3..52d569a1b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -10,16 +10,62 @@ namespace OpenGL { -OpenGLState OpenGLState::cur_state; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +OpenGLState OpenGLState::cur_state; bool OpenGLState::s_rgb_used; +namespace { + +template <typename T> +bool UpdateValue(T& current_value, const T new_value) { + const bool changed = current_value != new_value; + current_value = new_value; + return changed; +} + +template <typename T1, typename T2> +bool UpdateTie(T1 current_value, const T2 new_value) { + const bool changed = current_value != new_value; + current_value = new_value; + return changed; +} + +void Enable(GLenum cap, bool enable) { + if (enable) { + glEnable(cap); + } else { + glDisable(cap); + } +} + +void Enable(GLenum cap, GLuint index, bool enable) { + if (enable) { + glEnablei(cap, index); + } else { + glDisablei(cap, index); + } +} + +void Enable(GLenum cap, bool& current_value, bool new_value) { + if (UpdateValue(current_value, new_value)) + Enable(cap, new_value); +} + +void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { + if (UpdateValue(current_value, new_value)) + Enable(cap, index, new_value); +} + +} // namespace + OpenGLState::OpenGLState() { // These all match default OpenGL values - geometry_shaders.enabled = false; framebuffer_srgb.enabled = false; + multisample_control.alpha_to_coverage = false; multisample_control.alpha_to_one = false; + cull.enabled = false; cull.mode = GL_BACK; cull.front_face = GL_CCW; @@ -30,14 +76,15 @@ OpenGLState::OpenGLState() { primitive_restart.enabled = false; primitive_restart.index = 0; + for (auto& item : color_mask) { item.red_enabled = GL_TRUE; item.green_enabled = GL_TRUE; item.blue_enabled = GL_TRUE; item.alpha_enabled = GL_TRUE; } - stencil.test_enabled = false; - auto reset_stencil = [](auto& config) { + + const auto ResetStencil = [](auto& config) { config.test_func = GL_ALWAYS; config.test_ref = 0; config.test_mask = 0xFFFFFFFF; @@ -46,8 +93,10 @@ OpenGLState::OpenGLState() { config.action_depth_pass = GL_KEEP; config.action_stencil_fail = GL_KEEP; }; - reset_stencil(stencil.front); - reset_stencil(stencil.back); + stencil.test_enabled = false; + ResetStencil(stencil.front); + ResetStencil(stencil.back); + for (auto& item : viewports) { item.x = 0; item.y = 0; @@ -61,6 +110,7 @@ OpenGLState::OpenGLState() { item.scissor.width = 0; item.scissor.height = 0; } + for (auto& item : blend) { item.enabled = true; item.rgb_equation = GL_FUNC_ADD; @@ -70,11 +120,14 @@ OpenGLState::OpenGLState() { item.src_a_func = GL_ONE; item.dst_a_func = GL_ZERO; } + independant_blend.enabled = false; + blend_color.red = 0.0f; blend_color.green = 0.0f; blend_color.blue = 0.0f; blend_color.alpha = 0.0f; + logic_op.enabled = false; logic_op.operation = GL_COPY; @@ -91,9 +144,12 @@ OpenGLState::OpenGLState() { clip_distance = {}; point.size = 1; + fragment_color_clamp.enabled = false; + depth_clamp.far_plane = false; depth_clamp.near_plane = false; + polygon_offset.fill_enable = false; polygon_offset.line_enable = false; polygon_offset.point_enable = false; @@ -103,260 +159,255 @@ OpenGLState::OpenGLState() { } void OpenGLState::ApplyDefaultState() { + glEnable(GL_BLEND); glDisable(GL_FRAMEBUFFER_SRGB); glDisable(GL_CULL_FACE); glDisable(GL_DEPTH_TEST); glDisable(GL_PRIMITIVE_RESTART); glDisable(GL_STENCIL_TEST); - glEnable(GL_BLEND); glDisable(GL_COLOR_LOGIC_OP); glDisable(GL_SCISSOR_TEST); } +void OpenGLState::ApplyFramebufferState() const { + if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); + } + if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); + } +} + +void OpenGLState::ApplyVertexArrayState() const { + if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { + glBindVertexArray(draw.vertex_array); + } +} + +void OpenGLState::ApplyShaderProgram() const { + if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { + glUseProgram(draw.shader_program); + } +} + +void OpenGLState::ApplyProgramPipeline() const { + if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { + glBindProgramPipeline(draw.program_pipeline); + } +} + +void OpenGLState::ApplyClipDistances() const { + for (std::size_t i = 0; i < clip_distance.size(); ++i) { + Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], + clip_distance[i]); + } +} + +void OpenGLState::ApplyPointSize() const { + if (UpdateValue(cur_state.point.size, point.size)) { + glPointSize(point.size); + } +} + +void OpenGLState::ApplyFragmentColorClamp() const { + if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { + glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, + fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); + } +} + +void OpenGLState::ApplyMultisample() const { + Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, + multisample_control.alpha_to_coverage); + Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, + multisample_control.alpha_to_one); +} + +void OpenGLState::ApplyDepthClamp() const { + if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && + depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { + return; + } + cur_state.depth_clamp = depth_clamp; + + UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, + "Unimplemented Depth Clamp Separation!"); + + Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); +} + void OpenGLState::ApplySRgb() const { - if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { - if (framebuffer_srgb.enabled) { - // Track if sRGB is used - s_rgb_used = true; - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); - } + if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) + return; + cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; + if (framebuffer_srgb.enabled) { + // Track if sRGB is used + s_rgb_used = true; + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); } } void OpenGLState::ApplyCulling() const { - if (cull.enabled != cur_state.cull.enabled) { - if (cull.enabled) { - glEnable(GL_CULL_FACE); - } else { - glDisable(GL_CULL_FACE); - } - } + Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); - if (cull.mode != cur_state.cull.mode) { + if (UpdateValue(cur_state.cull.mode, cull.mode)) { glCullFace(cull.mode); } - if (cull.front_face != cur_state.cull.front_face) { + if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { glFrontFace(cull.front_face); } } void OpenGLState::ApplyColorMask() const { - if (independant_blend.enabled) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - const auto& updated = color_mask[i]; - const auto& current = cur_state.color_mask[i]; - if (updated.red_enabled != current.red_enabled || - updated.green_enabled != current.green_enabled || - updated.blue_enabled != current.blue_enabled || - updated.alpha_enabled != current.alpha_enabled) { - glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, - updated.blue_enabled, updated.alpha_enabled); - } - } - } else { - const auto& updated = color_mask[0]; - const auto& current = cur_state.color_mask[0]; + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + const auto& updated = color_mask[i]; + auto& current = cur_state.color_mask[i]; if (updated.red_enabled != current.red_enabled || updated.green_enabled != current.green_enabled || updated.blue_enabled != current.blue_enabled || updated.alpha_enabled != current.alpha_enabled) { - glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, - updated.alpha_enabled); + current = updated; + glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, + updated.blue_enabled, updated.alpha_enabled); } } } void OpenGLState::ApplyDepth() const { - if (depth.test_enabled != cur_state.depth.test_enabled) { - if (depth.test_enabled) { - glEnable(GL_DEPTH_TEST); - } else { - glDisable(GL_DEPTH_TEST); - } - } + Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); - if (depth.test_func != cur_state.depth.test_func) { + if (cur_state.depth.test_func != depth.test_func) { + cur_state.depth.test_func = depth.test_func; glDepthFunc(depth.test_func); } - if (depth.write_mask != cur_state.depth.write_mask) { + if (cur_state.depth.write_mask != depth.write_mask) { + cur_state.depth.write_mask = depth.write_mask; glDepthMask(depth.write_mask); } } void OpenGLState::ApplyPrimitiveRestart() const { - if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { - if (primitive_restart.enabled) { - glEnable(GL_PRIMITIVE_RESTART); - } else { - glDisable(GL_PRIMITIVE_RESTART); - } - } + Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); - if (primitive_restart.index != cur_state.primitive_restart.index) { + if (cur_state.primitive_restart.index != primitive_restart.index) { + cur_state.primitive_restart.index = primitive_restart.index; glPrimitiveRestartIndex(primitive_restart.index); } } void OpenGLState::ApplyStencilTest() const { - if (stencil.test_enabled != cur_state.stencil.test_enabled) { - if (stencil.test_enabled) { - glEnable(GL_STENCIL_TEST); - } else { - glDisable(GL_STENCIL_TEST); - } - } - - const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) { - if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || - config.test_mask != prev_config.test_mask) { + Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); + + const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { + if (current.test_func != config.test_func || current.test_ref != config.test_ref || + current.test_mask != config.test_mask) { + current.test_func = config.test_func; + current.test_ref = config.test_ref; + current.test_mask = config.test_mask; glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); } - if (config.action_depth_fail != prev_config.action_depth_fail || - config.action_depth_pass != prev_config.action_depth_pass || - config.action_stencil_fail != prev_config.action_stencil_fail) { + if (current.action_depth_fail != config.action_depth_fail || + current.action_depth_pass != config.action_depth_pass || + current.action_stencil_fail != config.action_stencil_fail) { + current.action_depth_fail = config.action_depth_fail; + current.action_depth_pass = config.action_depth_pass; + current.action_stencil_fail = config.action_stencil_fail; glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, config.action_depth_pass); } - if (config.write_mask != prev_config.write_mask) { + if (current.write_mask != config.write_mask) { + current.write_mask = config.write_mask; glStencilMaskSeparate(face, config.write_mask); } }; ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } -// Viewport does not affects glClearBuffer so emulate viewport using scissor test -void OpenGLState::EmulateViewportWithScissor() { - auto& current = viewports[0]; - if (current.scissor.enabled) { - const GLint left = std::max(current.x, current.scissor.x); - const GLint right = - std::max(current.x + current.width, current.scissor.x + current.scissor.width); - const GLint bottom = std::max(current.y, current.scissor.y); - const GLint top = - std::max(current.y + current.height, current.scissor.y + current.scissor.height); - current.scissor.x = std::max(left, 0); - current.scissor.y = std::max(bottom, 0); - current.scissor.width = std::max(right - left, 0); - current.scissor.height = std::max(top - bottom, 0); - } else { - current.scissor.enabled = true; - current.scissor.x = current.x; - current.scissor.y = current.y; - current.scissor.width = current.width; - current.scissor.height = current.height; - } -} void OpenGLState::ApplyViewport() const { - if (geometry_shaders.enabled) { - for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); - i++) { - const auto& current = cur_state.viewports[i]; - const auto& updated = viewports[i]; - if (updated.x != current.x || updated.y != current.y || - updated.width != current.width || updated.height != current.height) { - glViewportIndexedf( - i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), - static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height)); - } - if (updated.depth_range_near != current.depth_range_near || - updated.depth_range_far != current.depth_range_far) { - glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); - } - - if (updated.scissor.enabled != current.scissor.enabled) { - if (updated.scissor.enabled) { - glEnablei(GL_SCISSOR_TEST, i); - } else { - glDisablei(GL_SCISSOR_TEST, i); - } - } - - if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height) { - glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); - } - } - } else { - const auto& current = cur_state.viewports[0]; - const auto& updated = viewports[0]; - if (updated.x != current.x || updated.y != current.y || updated.width != current.width || - updated.height != current.height) { - glViewport(updated.x, updated.y, updated.width, updated.height); - } - - if (updated.depth_range_near != current.depth_range_near || - updated.depth_range_far != current.depth_range_far) { - glDepthRange(updated.depth_range_near, updated.depth_range_far); + for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { + const auto& updated = viewports[i]; + auto& current = cur_state.viewports[i]; + + if (current.x != updated.x || current.y != updated.y || current.width != updated.width || + current.height != updated.height) { + current.x = updated.x; + current.y = updated.y; + current.width = updated.width; + current.height = updated.height; + glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), + static_cast<GLfloat>(updated.width), + static_cast<GLfloat>(updated.height)); } - - if (updated.scissor.enabled != current.scissor.enabled) { - if (updated.scissor.enabled) { - glEnable(GL_SCISSOR_TEST); - } else { - glDisable(GL_SCISSOR_TEST); - } + if (current.depth_range_near != updated.depth_range_near || + current.depth_range_far != updated.depth_range_far) { + current.depth_range_near = updated.depth_range_near; + current.depth_range_far = updated.depth_range_far; + glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); } - if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height) { - glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); + Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); + + if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || + current.scissor.width != updated.scissor.width || + current.scissor.height != updated.scissor.height) { + current.scissor.x = updated.scissor.x; + current.scissor.y = updated.scissor.y; + current.scissor.width = updated.scissor.width; + current.scissor.height = updated.scissor.height; + glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, + updated.scissor.height); } } } void OpenGLState::ApplyGlobalBlending() const { - const Blend& current = cur_state.blend[0]; const Blend& updated = blend[0]; - if (updated.enabled != current.enabled) { - if (updated.enabled) { - glEnable(GL_BLEND); - } else { - glDisable(GL_BLEND); - } - } - if (!updated.enabled) { - return; - } - if (updated.src_rgb_func != current.src_rgb_func || - updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || - updated.dst_a_func != current.dst_a_func) { + Blend& current = cur_state.blend[0]; + + Enable(GL_BLEND, current.enabled, updated.enabled); + + if (current.src_rgb_func != updated.src_rgb_func || + current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || + current.dst_a_func != updated.dst_a_func) { + current.src_rgb_func = updated.src_rgb_func; + current.dst_rgb_func = updated.dst_rgb_func; + current.src_a_func = updated.src_a_func; + current.dst_a_func = updated.dst_a_func; glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { + if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { + current.rgb_equation = updated.rgb_equation; + current.a_equation = updated.a_equation; glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); } } void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { const Blend& updated = blend[target]; - const Blend& current = cur_state.blend[target]; - if (updated.enabled != current.enabled || force) { - if (updated.enabled) { - glEnablei(GL_BLEND, static_cast<GLuint>(target)); - } else { - glDisablei(GL_BLEND, static_cast<GLuint>(target)); - } + Blend& current = cur_state.blend[target]; + + if (current.enabled != updated.enabled || force) { + current.enabled = updated.enabled; + Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); } - if (updated.src_rgb_func != current.src_rgb_func || - updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || - updated.dst_a_func != current.dst_a_func) { + if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, + current.dst_a_func), + std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, + updated.dst_a_func))) { glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { + if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), + std::tie(updated.rgb_equation, updated.a_equation))) { glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, updated.a_equation); } @@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { void OpenGLState::ApplyBlending() const { if (independant_blend.enabled) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - ApplyTargetBlending(i, - independant_blend.enabled != cur_state.independant_blend.enabled); + const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; + for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { + ApplyTargetBlending(target, force); } } else { ApplyGlobalBlending(); } - if (blend_color.red != cur_state.blend_color.red || - blend_color.green != cur_state.blend_color.green || - blend_color.blue != cur_state.blend_color.blue || - blend_color.alpha != cur_state.blend_color.alpha) { + cur_state.independant_blend.enabled = independant_blend.enabled; + + if (UpdateTie( + std::tie(cur_state.blend_color.red, cur_state.blend_color.green, + cur_state.blend_color.blue, cur_state.blend_color.alpha), + std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); } } void OpenGLState::ApplyLogicOp() const { - if (logic_op.enabled != cur_state.logic_op.enabled) { - if (logic_op.enabled) { - glEnable(GL_COLOR_LOGIC_OP); - } else { - glDisable(GL_COLOR_LOGIC_OP); - } - } + Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); - if (logic_op.operation != cur_state.logic_op.operation) { + if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { glLogicOp(logic_op.operation); } } void OpenGLState::ApplyPolygonOffset() const { - const bool fill_enable_changed = - polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; - const bool line_enable_changed = - polygon_offset.line_enable != cur_state.polygon_offset.line_enable; - const bool point_enable_changed = - polygon_offset.point_enable != cur_state.polygon_offset.point_enable; - const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; - const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; - const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; - - if (fill_enable_changed) { - if (polygon_offset.fill_enable) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - } - - if (line_enable_changed) { - if (polygon_offset.line_enable) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - } - - if (point_enable_changed) { - if (polygon_offset.point_enable) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - } - - if (factor_changed || units_changed || clamp_changed) { + Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, + polygon_offset.fill_enable); + Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, + polygon_offset.line_enable); + Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, + polygon_offset.point_enable); + + if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, + cur_state.polygon_offset.clamp), + std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); } else { - glPolygonOffset(polygon_offset.factor, polygon_offset.units); UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, "Unimplemented Depth polygon offset clamp."); + glPolygonOffset(polygon_offset.factor, polygon_offset.units); } } } @@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const { bool has_delta{}; std::size_t first{}; std::size_t last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures; + std::array<GLuint, Maxwell::NumTextureSamplers> textures; for (std::size_t i = 0; i < std::size(texture_units); ++i) { const auto& texture_unit = texture_units[i]; - const auto& cur_state_texture_unit = cur_state.texture_units[i]; + auto& cur_state_texture_unit = cur_state.texture_units[i]; textures[i] = texture_unit.texture; - - if (textures[i] != cur_state_texture_unit.texture) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (cur_state_texture_unit.texture == textures[i]) + continue; + cur_state_texture_unit.texture = textures[i]; + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } - if (has_delta) { glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), textures.data() + first); @@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const { bool has_delta{}; std::size_t first{}; std::size_t last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; + std::array<GLuint, Maxwell::NumTextureSamplers> samplers; + for (std::size_t i = 0; i < std::size(samplers); ++i) { + if (cur_state.texture_units[i].sampler == texture_units[i].sampler) + continue; + cur_state.texture_units[i].sampler = texture_units[i].sampler; samplers[i] = texture_units[i].sampler; - if (samplers[i] != cur_state.texture_units[i].sampler) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } if (has_delta) { glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), @@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const { } } -void OpenGLState::ApplyFramebufferState() const { - if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); - } - if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); - } -} - -void OpenGLState::ApplyVertexArrayState() const { - if (draw.vertex_array != cur_state.draw.vertex_array) { - glBindVertexArray(draw.vertex_array); - } -} - -void OpenGLState::ApplyDepthClamp() const { - if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && - depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { - return; - } - UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, - "Unimplemented Depth Clamp Separation!"); - - if (depth_clamp.far_plane || depth_clamp.near_plane) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } -} - void OpenGLState::Apply() const { ApplyFramebufferState(); ApplyVertexArrayState(); - - // Shader program - if (draw.shader_program != cur_state.draw.shader_program) { - glUseProgram(draw.shader_program); - } - - // Program pipeline - if (draw.program_pipeline != cur_state.draw.program_pipeline) { - glBindProgramPipeline(draw.program_pipeline); - } - // Clip distance - for (std::size_t i = 0; i < clip_distance.size(); ++i) { - if (clip_distance[i] != cur_state.clip_distance[i]) { - if (clip_distance[i]) { - glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); - } else { - glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); - } - } - } - // Point - if (point.size != cur_state.point.size) { - glPointSize(point.size); - } - if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) { - glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, - fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); - } - if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) { - if (multisample_control.alpha_to_coverage) { - glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE); - } else { - glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE); - } - } - if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) { - if (multisample_control.alpha_to_one) { - glEnable(GL_SAMPLE_ALPHA_TO_ONE); - } else { - glDisable(GL_SAMPLE_ALPHA_TO_ONE); - } - } + ApplyShaderProgram(); + ApplyProgramPipeline(); + ApplyClipDistances(); + ApplyPointSize(); + ApplyFragmentColorClamp(); + ApplyMultisample(); ApplyDepthClamp(); ApplyColorMask(); ApplyViewport(); @@ -574,7 +531,28 @@ void OpenGLState::Apply() const { ApplyTextures(); ApplySamplers(); ApplyPolygonOffset(); - cur_state = *this; +} + +void OpenGLState::EmulateViewportWithScissor() { + auto& current = viewports[0]; + if (current.scissor.enabled) { + const GLint left = std::max(current.x, current.scissor.x); + const GLint right = + std::max(current.x + current.width, current.scissor.x + current.scissor.width); + const GLint bottom = std::max(current.y, current.scissor.y); + const GLint top = + std::max(current.y + current.height, current.scissor.y + current.scissor.height); + current.scissor.x = std::max(left, 0); + current.scissor.y = std::max(bottom, 0); + current.scissor.width = std::max(right - left, 0); + current.scissor.height = std::max(top - bottom, 0); + } else { + current.scissor.enabled = true; + current.scissor.x = current.x; + current.scissor.y = current.y; + current.scissor.width = current.width; + current.scissor.height = current.height; + } } OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 9e1eda5b1..41418a7b8 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -54,10 +54,6 @@ public: } depth_clamp; // GL_DEPTH_CLAMP struct { - bool enabled; // viewports arrays are only supported when geometry shaders are enabled. - } geometry_shaders; - - struct { bool enabled; // GL_CULL_FACE GLenum mode; // GL_CULL_FACE_MODE GLenum front_face; // GL_FRONT_FACE @@ -184,34 +180,26 @@ public: static OpenGLState GetCurState() { return cur_state; } + static bool GetsRGBUsed() { return s_rgb_used; } + static void ClearsRGBUsed() { s_rgb_used = false; } + /// Apply this state as the current OpenGL state void Apply() const; - /// Apply only the state affecting the framebuffer + void ApplyFramebufferState() const; - /// Apply only the state affecting the vertex array void ApplyVertexArrayState() const; - /// Set the initial OpenGL state - static void ApplyDefaultState(); - /// Resets any references to the given resource - OpenGLState& UnbindTexture(GLuint handle); - OpenGLState& ResetSampler(GLuint handle); - OpenGLState& ResetProgram(GLuint handle); - OpenGLState& ResetPipeline(GLuint handle); - OpenGLState& ResetVertexArray(GLuint handle); - OpenGLState& ResetFramebuffer(GLuint handle); - void EmulateViewportWithScissor(); - -private: - static OpenGLState cur_state; - // Workaround for sRGB problems caused by - // QT not supporting srgb output - static bool s_rgb_used; + void ApplyShaderProgram() const; + void ApplyProgramPipeline() const; + void ApplyClipDistances() const; + void ApplyPointSize() const; + void ApplyFragmentColorClamp() const; + void ApplyMultisample() const; void ApplySRgb() const; void ApplyCulling() const; void ApplyColorMask() const; @@ -227,6 +215,26 @@ private: void ApplySamplers() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; + + /// Set the initial OpenGL state + static void ApplyDefaultState(); + + /// Resets any references to the given resource + OpenGLState& UnbindTexture(GLuint handle); + OpenGLState& ResetSampler(GLuint handle); + OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetPipeline(GLuint handle); + OpenGLState& ResetVertexArray(GLuint handle); + OpenGLState& ResetFramebuffer(GLuint handle); + + /// Viewport does not affects glClearBuffer so emulate viewport using scissor test + void EmulateViewportWithScissor(); + +private: + static OpenGLState cur_state; + + // Workaround for sRGB problems caused by QT not supporting srgb output + static bool s_rgb_used; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b97576309..d69cba9c3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <cstddef> #include <cstdlib> -#include <cstring> #include <memory> #include <glad/glad.h> #include "common/assert.h" @@ -164,8 +163,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, - Memory::FlushMode::Flush); + rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes); constexpr u32 linear_bpp = 4; VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, @@ -267,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() { } // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); - rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info); + rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, |
