diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
16 files changed, 639 insertions, 534 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b3062e5ba..a4eea61a6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -13,6 +13,11 @@ namespace OpenGL { +CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, + std::size_t alignment, u8* host_ptr) + : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ + host_ptr} {} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} @@ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size // TODO: Figure out which size is the best for given games. cache &= size >= 2048; + const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; if (cache) { - auto entry = TryGet(*cpu_addr); + auto entry = TryGet(host_ptr); if (entry) { - if (entry->size >= size && entry->alignment == alignment) { - return entry->offset; + if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { + return entry->GetOffset(); } Unregister(entry); } @@ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size AlignBuffer(alignment); const GLintptr uploaded_offset = buffer_offset; - Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + if (!host_ptr) { + return uploaded_offset; + } + std::memcpy(buffer_ptr, host_ptr, size); buffer_ptr += size; buffer_offset += size; if (cache) { - auto entry = std::make_shared<CachedBufferEntry>(); - entry->offset = uploaded_offset; - entry->size = size; - entry->alignment = alignment; - entry->addr = *cpu_addr; + auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, + alignment, host_ptr); Register(entry); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index c11acfb79..1de1f84ae 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -17,22 +17,39 @@ namespace OpenGL { class RasterizerOpenGL; -struct CachedBufferEntry final : public RasterizerCacheObject { - VAddr GetAddr() const override { - return addr; +class CachedBufferEntry final : public RasterizerCacheObject { +public: + explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, + std::size_t alignment, u8* host_ptr); + + VAddr GetCpuAddr() const override { + return cpu_addr; } std::size_t GetSizeInBytes() const override { return size; } + std::size_t GetSize() const { + return size; + } + + GLintptr GetOffset() const { + return offset; + } + + std::size_t GetAlignment() const { + return alignment; + } + // We do not have to flush this cache as things in it are never modified by us. void Flush() override {} - VAddr addr; - std::size_t size; - GLintptr offset; - std::size_t alignment; +private: + VAddr cpu_addr{}; + std::size_t size{}; + GLintptr offset{}; + std::size_t alignment{}; }; class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index c7f32feaa..a2c509c24 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -15,12 +15,13 @@ namespace OpenGL { -CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { +CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) + : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} { buffer.Create(); // Bind and unbind the buffer so it gets allocated by the driver glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); + LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); } void CachedGlobalRegion::Reload(u32 size_) { @@ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) { // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); - glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); + glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); } GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { @@ -46,19 +47,19 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 return search->second; } -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; if (!region) { // No reserved surface available, create a new one and reserve it - region = std::make_shared<CachedGlobalRegion>(addr, size); + region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); ReserveGlobalRegion(region); } region->Reload(size); return region; } -void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { - reserve[region->GetAddr()] = region; +void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { + reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); } GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) @@ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( ASSERT(actual_addr); // Look up global region in the cache based on address - GlobalRegion region = TryGet(*actual_addr); + const auto& host_ptr{Memory::GetPointer(*actual_addr)}; + GlobalRegion region{TryGet(host_ptr)}; if (!region) { // No global region found - create a new one - region = GetUncachedGlobalRegion(*actual_addr, size); + region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); Register(region); } diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 37830bb7c..e497a0619 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -27,15 +27,13 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; class CachedGlobalRegion final : public RasterizerCacheObject { public: - explicit CachedGlobalRegion(VAddr addr, u32 size); + explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); - /// Gets the address of the shader in guest memory, required for cache management - VAddr GetAddr() const { - return addr; + VAddr GetCpuAddr() const override { + return cpu_addr; } - /// Gets the size of the shader in guest memory, required for cache management - std::size_t GetSizeInBytes() const { + std::size_t GetSizeInBytes() const override { return size; } @@ -53,9 +51,8 @@ public: } private: - VAddr addr{}; + VAddr cpu_addr{}; u32 size{}; - OGLBuffer buffer; }; @@ -69,8 +66,8 @@ public: private: GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); - void ReserveGlobalRegion(const GlobalRegion& region); + GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); + void ReserveGlobalRegion(GlobalRegion region); std::unordered_map<VAddr, GlobalRegion> reserve; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 12d876120..bb6de5477 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -102,8 +102,9 @@ struct FramebufferCacheKey { RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { + : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, + emu_window{window}, system{system}, screen_info{info}, + buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -118,7 +119,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); - LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); + LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); CheckExtensions(); } @@ -138,7 +139,7 @@ void RasterizerOpenGL::CheckExtensions() { } GLuint RasterizerOpenGL::SetupVertexFormat() { - auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; if (!gpu.dirty_flags.vertex_attrib_format) { @@ -177,7 +178,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { continue; const auto& buffer = regs.vertex_array[attrib.buffer]; - LOG_TRACE(HW_GPU, + LOG_TRACE(Render_OpenGL, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); @@ -200,24 +201,24 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { } // Rebinding the VAO invalidates the vertex buffer bindings. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); state.draw.vertex_array = vao_entry.handle; return vao_entry.handle; } void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { - auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; - if (!gpu.dirty_flags.vertex_array) + if (gpu.dirty_flags.vertex_array.none()) return; MICROPROFILE_SCOPE(OpenGL_VB); // Upload all guest vertex arrays sequentially to our buffer for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (~gpu.dirty_flags.vertex_array & (1u << index)) + if (!gpu.dirty_flags.vertex_array[index]) continue; const auto& vertex_array = regs.vertex_array[index]; @@ -244,11 +245,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { } } - gpu.dirty_flags.vertex_array = 0; + gpu.dirty_flags.vertex_array.reset(); } DrawParameters RasterizerOpenGL::SetupDraw() { - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const bool is_indexed = accelerate_draw == AccelDraw::Indexed; @@ -297,7 +298,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); - auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = system.GPU().Maxwell3D(); BaseBindings base_bindings; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; @@ -343,9 +344,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_program_manager->UseProgrammableFragmentShader(program_handle); break; default: - LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - UNREACHABLE(); + UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, + shader_config.enable.Value(), shader_config.offset); } const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); @@ -414,7 +414,7 @@ void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -432,7 +432,7 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { } std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; return static_cast<std::size_t>(regs.index_array.count) * static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); @@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { return boost::make_iterator_range(map.equal_range(interval)); } -void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { +void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; @@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, single_color_target}; - if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && - !gpu.dirty_flags.zeta_buffer) { + if (fb_config_state == current_framebuffer_config_state && + gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the // host framebuffer may contain different attachments @@ -582,7 +582,7 @@ void RasterizerOpenGL::Clear() { const auto prev_state{state}; SCOPE_EXIT({ prev_state.Apply(); }); - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; bool use_color{}; bool use_depth{}; bool use_stencil{}; @@ -673,7 +673,7 @@ void RasterizerOpenGL::DrawArrays() { return; MICROPROFILE_SCOPE(OpenGL_Drawing); - auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; ConfigureFramebuffers(state); @@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() { // Add space for at least 18 constant buffers buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); - bool invalidate = buffer_cache.Map(buffer_size); + const bool invalidate = buffer_cache.Map(buffer_size); if (invalidate) { // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_array = 0xFFFFFFFF; + gpu.dirty_flags.vertex_array.set(); } const GLuint vao = SetupVertexFormat(); @@ -738,33 +738,21 @@ void RasterizerOpenGL::DrawArrays() { shader_program_manager->ApplyTo(state); state.Apply(); - // Execute draw call + res_cache.SignalPreDrawCall(); params.DispatchDraw(); - - // Disable scissor test - state.viewports[0].scissor.enabled = false; + res_cache.SignalPostDrawCall(); accelerate_draw = AccelDraw::Disabled; - - // Unbind textures for potential future use as framebuffer attachments - for (auto& texture_unit : state.texture_units) { - texture_unit.Unbind(); - } - state.Apply(); } void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - - if (Settings::values.use_accurate_gpu_emulation) { - // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit - res_cache.FlushRegion(addr, size); - } + res_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); @@ -772,15 +760,15 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { buffer_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) { MICROPROFILE_SCOPE(OpenGL_Blits); res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); return true; @@ -794,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; + const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; if (!surface) { return {}; } @@ -805,7 +793,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); + + if (params.pixel_format != pixel_format) { + LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); + } screen_info.display_texture = surface->Texture().handle; @@ -814,104 +805,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SamplerInfo::Create() { sampler.Create(); - mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; - wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; - uses_depth_compare = false; + mag_filter = Tegra::Texture::TextureFilter::Linear; + min_filter = Tegra::Texture::TextureFilter::Linear; + wrap_u = Tegra::Texture::WrapMode::Wrap; + wrap_v = Tegra::Texture::WrapMode::Wrap; + wrap_p = Tegra::Texture::WrapMode::Wrap; + use_depth_compare = false; depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; - // default is GL_LINEAR_MIPMAP_LINEAR + // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - // Other attributes have correct defaults glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); + + // Other attributes have correct defaults } void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { - const GLuint s = sampler.handle; + const GLuint sampler_id = sampler.handle; if (mag_filter != config.mag_filter) { mag_filter = config.mag_filter; glSamplerParameteri( - s, GL_TEXTURE_MAG_FILTER, + sampler_id, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); } - if (min_filter != config.min_filter || mip_filter != config.mip_filter) { + if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { min_filter = config.min_filter; - mip_filter = config.mip_filter; - glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); + mipmap_filter = config.mipmap_filter; + glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); } if (wrap_u != config.wrap_u) { wrap_u = config.wrap_u; - glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); } if (wrap_v != config.wrap_v) { wrap_v = config.wrap_v; - glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); } if (wrap_p != config.wrap_p) { wrap_p = config.wrap_p; - glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); } - if (uses_depth_compare != (config.depth_compare_enabled == 1)) { - uses_depth_compare = (config.depth_compare_enabled == 1); - if (uses_depth_compare) { - glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); - } else { - glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE); - } + if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { + use_depth_compare = enabled; + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, + use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); } if (depth_compare_func != config.depth_compare_func) { depth_compare_func = config.depth_compare_func; - glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, MaxwellToGL::DepthCompareFunc(depth_compare_func)); } - GLvec4 new_border_color; - if (config.srgb_conversion) { - new_border_color[0] = config.srgb_border_color_r / 255.0f; - new_border_color[1] = config.srgb_border_color_g / 255.0f; - new_border_color[2] = config.srgb_border_color_g / 255.0f; - } else { - new_border_color[0] = config.border_color_r; - new_border_color[1] = config.border_color_g; - new_border_color[2] = config.border_color_b; - } - new_border_color[3] = config.border_color_a; - - if (border_color != new_border_color) { + if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { border_color = new_border_color; - glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); + glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); } - const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); - if (anisotropic_max != max_anisotropic) { - max_anisotropic = anisotropic_max; + if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { + max_anisotropic = anisotropic; if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); } } - const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f; - if (lod_min != min_lod) { - min_lod = lod_min; - glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod); - } - const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; - if (lod_max != max_lod) { - max_lod = lod_max; - glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod); + if (const float min = config.GetMinLod(); min_lod != min) { + min_lod = min; + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); } - const u32 bias = config.mip_lod_bias.Value(); - // Sign extend the 13-bit value. - constexpr u32 mask = 1U << (13 - 1); - const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; - if (lod_bias != bias_lod) { - lod_bias = bias_lod; - glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias); + if (const float max = config.GetMaxLod(); max_lod != max) { + max_lod = max; + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); + } + + if (const float bias = config.GetLodBias(); lod_bias != bias) { + lod_bias = bias; + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); } } @@ -919,7 +893,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader const Shader& shader, GLuint program_handle, BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& gpu = Core::System::GetInstance().GPU(); + const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; const auto& entries = shader->GetShaderEntries().const_buffers; @@ -951,8 +925,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader size = buffer.size; if (size > MaxConstbufferSize) { - LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); size = MaxConstbufferSize; } } else { @@ -998,7 +972,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, GLuint program_handle, BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& gpu = Core::System::GetInstance().GPU(); + const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; @@ -1012,10 +986,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); - Surface surface = res_cache.GetTextureSurface(texture, entry); - if (surface != nullptr) { + if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { state.texture_units[current_bindpoint].texture = - entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; + surface->Texture(entry.IsArray()).handle; surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, texture.tic.w_source); } else { @@ -1026,7 +999,7 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s } void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; const bool geometry_shaders_enabled = regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); const std::size_t viewport_count = @@ -1034,7 +1007,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { for (std::size_t i = 0; i < viewport_count; i++) { auto& viewport = current_state.viewports[i]; const auto& src = regs.viewports[i]; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; + const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; viewport.x = viewport_rect.left; viewport.y = viewport_rect.bottom; viewport.width = viewport_rect.GetWidth(); @@ -1049,7 +1022,7 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { void RasterizerOpenGL::SyncClipEnabled( const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, @@ -1066,7 +1039,7 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.cull.enabled = regs.cull.enabled != 0; @@ -1090,14 +1063,14 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncPrimitiveRestart() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.primitive_restart.enabled = regs.primitive_restart.enabled; state.primitive_restart.index = regs.primitive_restart.index; } void RasterizerOpenGL::SyncDepthTestState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.depth.test_enabled = regs.depth_test_enable != 0; state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; @@ -1109,7 +1082,7 @@ void RasterizerOpenGL::SyncDepthTestState() { } void RasterizerOpenGL::SyncStencilTestState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.stencil.test_enabled = regs.stencil_enable != 0; if (!regs.stencil_enable) { @@ -1143,7 +1116,7 @@ void RasterizerOpenGL::SyncStencilTestState() { } void RasterizerOpenGL::SyncColorMask() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t count = regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; for (std::size_t i = 0; i < count; i++) { @@ -1157,18 +1130,18 @@ void RasterizerOpenGL::SyncColorMask() { } void RasterizerOpenGL::SyncMultiSampleState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; } void RasterizerOpenGL::SyncFragmentColorClampState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; } void RasterizerOpenGL::SyncBlendState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.blend_color.red = regs.blend_color.r; state.blend_color.green = regs.blend_color.g; @@ -1210,7 +1183,7 @@ void RasterizerOpenGL::SyncBlendState() { } void RasterizerOpenGL::SyncLogicOpState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.logic_op.enabled = regs.logic_op.enable != 0; @@ -1224,7 +1197,7 @@ void RasterizerOpenGL::SyncLogicOpState() { } void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; const bool geometry_shaders_enabled = regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); const std::size_t viewport_count = @@ -1246,21 +1219,17 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { } void RasterizerOpenGL::SyncTransformFeedback() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - - if (regs.tfb_enabled != 0) { - LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented"); - UNREACHABLE(); - } + const auto& regs = system.GPU().Maxwell3D().regs; + UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); } void RasterizerOpenGL::SyncPointState() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.point.size = regs.point_size; } void RasterizerOpenGL::SyncPolygonOffset() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& regs = system.GPU().Maxwell3D().regs; state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; @@ -1270,13 +1239,9 @@ void RasterizerOpenGL::SyncPolygonOffset() { } void RasterizerOpenGL::CheckAlphaTests() { - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - - if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { - LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, " - "this behavior is undefined."); - UNREACHABLE(); - } + const auto& regs = system.GPU().Maxwell3D().regs; + UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, + "Alpha Testing is enabled with more than one rendertarget"); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 258d62259..30f3e8acb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -57,17 +57,17 @@ public: void DrawArrays() override; void Clear() override; void FlushAll() override; - void FlushRegion(VAddr addr, u64 size) override; - void InvalidateRegion(VAddr addr, u64 size) override; - void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + void FlushRegion(CacheAddr addr, u64 size) override; + void InvalidateRegion(CacheAddr addr, u64 size) override; + void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect) override; + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; - void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -94,11 +94,12 @@ private: private: Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; + Tegra::Texture::TextureMipmapFilter mipmap_filter = + Tegra::Texture::TextureMipmapFilter::None; Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; - bool uses_depth_compare = false; + bool use_depth_compare = false; Tegra::Texture::DepthCompareFunc depth_compare_func = Tegra::Texture::DepthCompareFunc::Always; GLvec4 border_color = {}; @@ -214,6 +215,7 @@ private: GlobalRegionCacheOpenGL global_cache; Core::Frontend::EmuWindow& emu_window; + Core::System& system; ScreenInfo& screen_info; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 59f671048..451de00e8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <optional> #include <glad/glad.h> #include "common/alignment.h" @@ -20,7 +21,7 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/surface.h" -#include "video_core/textures/astc.h" +#include "video_core/textures/convert.h" #include "video_core/textures/decoders.h" namespace OpenGL { @@ -60,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { addr = cpu_addr ? *cpu_addr : 0; gpu_addr = gpu_addr_; + host_ptr = Memory::GetPointer(addr); size_in_bytes = SizeInBytesRaw(); if (IsPixelFormatASTC(pixel_format)) { @@ -399,7 +401,28 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return format; } -MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { +/// Returns the discrepant array target +constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { + switch (target) { + case SurfaceTarget::Texture1D: + return GL_TEXTURE_1D_ARRAY; + case SurfaceTarget::Texture2D: + return GL_TEXTURE_2D_ARRAY; + case SurfaceTarget::Texture3D: + return GL_NONE; + case SurfaceTarget::Texture1DArray: + return GL_TEXTURE_1D; + case SurfaceTarget::Texture2DArray: + return GL_TEXTURE_2D; + case SurfaceTarget::TextureCubemap: + return GL_TEXTURE_CUBE_MAP_ARRAY; + case SurfaceTarget::TextureCubeArray: + return GL_TEXTURE_CUBE_MAP; + } + return GL_NONE; +} + +Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; if (IsPixelFormatASTC(pixel_format)) { // ASTC formats must stop at the ATSC block size boundary @@ -423,8 +446,8 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, for (u32 i = 0; i < params.depth; i++) { MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), params.tile_width_spacing, 1, - gl_buffer.data() + offset_gl, gl_size, params.addr + offset); + params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, + gl_buffer.data() + offset_gl, params.addr + offset); offset += layer_size; offset_gl += gl_size; } @@ -433,7 +456,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), params.MipBlockHeight(mip_level), params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, - gl_buffer.data(), gl_buffer.size(), params.addr + offset); + gl_buffer.data(), params.addr + offset); } } @@ -541,14 +564,16 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } CachedSurface::CachedSurface(const SurfaceParams& params) - : params(params), gl_target(SurfaceTargetToGL(params.target)), - cached_size_in_bytes(params.size_in_bytes) { + : params{params}, gl_target{SurfaceTargetToGL(params.target)}, + cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} { texture.Create(gl_target); // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) // alternatives. This signals a bug on those functions. const auto width = static_cast<GLsizei>(params.MipWidth(0)); const auto height = static_cast<GLsizei>(params.MipHeight(0)); + memory_size = params.MemorySize(); + reinterpreted = false; const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); gl_internal_format = format_tuple.internal_format; @@ -594,103 +619,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params) } } -static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { - union S8Z24 { - BitField<0, 24, u32> z24; - BitField<24, 8, u32> s8; - }; - static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); - - union Z24S8 { - BitField<0, 8, u32> s8; - BitField<8, 24, u32> z24; - }; - static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); - - S8Z24 s8z24_pixel{}; - Z24S8 z24s8_pixel{}; - constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)}; - for (std::size_t y = 0; y < height; ++y) { - for (std::size_t x = 0; x < width; ++x) { - const std::size_t offset{bpp * (y * width + x)}; - if (reverse) { - std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); - s8z24_pixel.s8.Assign(z24s8_pixel.s8); - s8z24_pixel.z24.Assign(z24s8_pixel.z24); - std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); - } else { - std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); - z24s8_pixel.s8.Assign(s8z24_pixel.s8); - z24s8_pixel.z24.Assign(s8z24_pixel.z24); - std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); - } - } - } -} - -/** - * Helper function to perform software conversion (as needed) when loading a buffer from Switch - * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with - * typical desktop GPUs. - */ -static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, - u32 width, u32 height, u32 depth) { - switch (pixel_format) { - case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_8X8: - case PixelFormat::ASTC_2D_8X5: - case PixelFormat::ASTC_2D_5X4: - case PixelFormat::ASTC_2D_5X5: - case PixelFormat::ASTC_2D_4X4_SRGB: - case PixelFormat::ASTC_2D_8X8_SRGB: - case PixelFormat::ASTC_2D_8X5_SRGB: - case PixelFormat::ASTC_2D_5X4_SRGB: - case PixelFormat::ASTC_2D_5X5_SRGB: - case PixelFormat::ASTC_2D_10X8: - case PixelFormat::ASTC_2D_10X8_SRGB: { - // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. - u32 block_width{}; - u32 block_height{}; - std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); - data = - Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); - break; - } - case PixelFormat::S8Z24: - // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. - ConvertS8Z24ToZ24S8(data, width, height, false); - break; - } -} - -/** - * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to - * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or - * with typical desktop GPUs. - */ -static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, - u32 width, u32 height) { - switch (pixel_format) { - case PixelFormat::ASTC_2D_4X4: - case PixelFormat::ASTC_2D_8X8: - case PixelFormat::ASTC_2D_4X4_SRGB: - case PixelFormat::ASTC_2D_8X8_SRGB: - case PixelFormat::ASTC_2D_5X5: - case PixelFormat::ASTC_2D_5X5_SRGB: - case PixelFormat::ASTC_2D_10X8: - case PixelFormat::ASTC_2D_10X8_SRGB: { - LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", - static_cast<u32>(pixel_format)); - UNREACHABLE(); - break; - } - case PixelFormat::S8Z24: - // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. - ConvertS8Z24ToZ24S8(data, width, height, true); - break; - } -} - MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); void CachedSurface::LoadGLBuffer() { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); @@ -706,10 +634,9 @@ void CachedSurface::LoadGLBuffer() { const u32 bpp = params.GetFormatBpp() / 8; const u32 copy_size = params.width * bpp; if (params.pitch == copy_size) { - std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), - params.size_in_bytes_gl); + std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); } else { - const u8* start = Memory::GetPointer(params.addr); + const u8* start{params.host_ptr}; u8* write_to = gl_buffer[0].data(); for (u32 h = params.height; h > 0; h--) { std::memcpy(write_to, start, copy_size); @@ -719,8 +646,16 @@ void CachedSurface::LoadGLBuffer() { } } for (u32 i = 0; i < params.max_mip_level; i++) { - ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), - params.MipHeight(i), params.MipDepth(i)); + const u32 width = params.MipWidth(i); + const u32 height = params.MipHeight(i); + const u32 depth = params.MipDepth(i); + if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { + // Reserve size for RGBA8 conversion + constexpr std::size_t rgba_bpp = 4; + gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); + } + Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, + height, depth, true, true); } } @@ -743,10 +678,8 @@ void CachedSurface::FlushGLBuffer() { glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); glPixelStorei(GL_PACK_ROW_LENGTH, 0); - ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, - params.height); - const u8* const texture_src_data = Memory::GetPointer(params.addr); - ASSERT(texture_src_data); + Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, + params.height, params.depth, true, true); if (params.is_tiled) { ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", params.block_width, static_cast<u32>(params.target)); @@ -756,9 +689,9 @@ void CachedSurface::FlushGLBuffer() { const u32 bpp = params.GetFormatBpp() / 8; const u32 copy_size = params.width * bpp; if (params.pitch == copy_size) { - std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); + std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); } else { - u8* start = Memory::GetPointer(params.addr); + u8* start{params.host_ptr}; const u8* read_to = gl_buffer[0].data(); for (u32 h = params.height; h > 0; h--) { std::memcpy(start, read_to, copy_size); @@ -881,20 +814,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } -void CachedSurface::EnsureTextureView() { - if (texture_view.handle != 0) +void CachedSurface::EnsureTextureDiscrepantView() { + if (discrepant_view.handle != 0) return; - const GLenum target{TargetLayer()}; + const GLenum target{GetArrayDiscrepantTarget(params.target)}; + ASSERT(target != GL_NONE); + const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; constexpr GLuint min_layer = 0; constexpr GLuint min_level = 0; - glGenTextures(1, &texture_view.handle); - glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, + glGenTextures(1, &discrepant_view.handle); + glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, params.max_mip_level, min_layer, num_layers); - ApplyTextureDefaults(texture_view.handle, params.max_mip_level); - glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, + ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); + glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, reinterpret_cast<const GLint*>(swizzle.data())); } @@ -920,8 +855,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, swizzle = {new_x, new_y, new_z, new_w}; const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); - if (texture_view.handle != 0) { - glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); + if (discrepant_view.handle != 0) { + glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); } } @@ -962,30 +897,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; const auto& regs{gpu.regs}; - if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { - return last_color_buffers[index]; + if (!gpu.dirty_flags.color_buffer[index]) { + return current_color_buffers[index]; } - gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); + gpu.dirty_flags.color_buffer.reset(index); ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); if (index >= regs.rt_control.count) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return last_color_buffers[index] = {}; + return current_color_buffers[index] = {}; } const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - return last_color_buffers[index] = GetSurface(color_params, preserve_contents); + return current_color_buffers[index] = GetSurface(color_params, preserve_contents); } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { surface->LoadGLBuffer(); surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); + surface->MarkForReload(false); } Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { @@ -994,21 +930,26 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres } // Look up surface in the cache based on address - Surface surface{TryGet(params.addr)}; + Surface surface{TryGet(params.host_ptr)}; if (surface) { if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is + // Use the cached surface as-is unless it's not synced with memory + if (surface->MustReload()) + LoadSurface(surface); return surface; } else if (preserve_contents) { // If surface parameters changed and we care about keeping the previous data, recreate // the surface from the old one Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); + UnregisterSurface(surface); Register(new_surface); + if (new_surface->IsUploaded()) { + RegisterReinterpretSurface(new_surface); + } return new_surface; } else { // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); + UnregisterSurface(surface); } } @@ -1043,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, for (u32 layer = 0; layer < dst_params.depth; layer++) { for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); - const Surface& copy = TryGet(sub_address); + const Surface& copy = TryGet(Memory::GetPointer(sub_address)); if (!copy) continue; const auto& src_params{copy->GetSurfaceParams()}; @@ -1062,8 +1003,8 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, } static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect, GLuint read_fb_handle, + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, std::size_t cubemap_face = 0) { @@ -1193,7 +1134,7 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, void RasterizerCacheOpenGL::FermiCopySurface( const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, const MathUtil::Rectangle<u32>& dst_rect) { + const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); @@ -1220,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, const auto& dst_params{dst_surface->GetSurfaceParams()}; // Flush enough memory for both the source and destination surface - FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); + FlushRegion(ToCacheAddr(src_params.host_ptr), + std::max(src_params.MemorySize(), dst_params.MemorySize())); LoadSurface(dst_surface); } @@ -1257,7 +1199,11 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, case SurfaceTarget::TextureCubemap: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubeArray: - FastLayeredCopySurface(old_surface, new_surface); + if (old_params.pixel_format == new_params.pixel_format) + FastLayeredCopySurface(old_surface, new_surface); + else { + AccurateCopySurface(old_surface, new_surface); + } break; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", @@ -1268,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, return new_surface; } -Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { - return TryGet(addr); +Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { + return TryGet(host_ptr); } void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { @@ -1286,4 +1232,108 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params return {}; } +static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, + u32 height) { + for (u32 i = 0; i < params.max_mip_level; i++) { + if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { + return {i}; + } + } + return {}; +} + +static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { + const std::size_t size = params.LayerMemorySize(); + VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); + for (u32 i = 0; i < params.depth; i++) { + if (start == addr) { + return {i}; + } + start += size; + } + return {}; +} + +static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + const std::size_t src_memory_size = src_params.size_in_bytes; + const std::optional<u32> level = + TryFindBestMipMap(src_memory_size, dst_params, src_params.height); + if (level.has_value()) { + if (src_params.width == dst_params.MipWidthGobAligned(*level) && + src_params.height == dst_params.MipHeight(*level) && + src_params.block_height >= dst_params.MipBlockHeight(*level)) { + const std::optional<u32> slot = + TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); + if (slot.has_value()) { + glCopyImageSubData(render_surface->Texture().handle, + SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, + blitted_surface->Texture().handle, + SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, + dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); + blitted_surface->MarkAsModified(true, cache); + return true; + } + } + } + return false; +} + +static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { + const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); + const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); + if (bound2 > bound1) + return true; + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.component_type != src_params.component_type); +} + +static bool IsReinterpretInvalidSecond(const Surface render_surface, + const Surface blitted_surface) { + const auto& dst_params = blitted_surface->GetSurfaceParams(); + const auto& src_params = render_surface->GetSurfaceParams(); + return (dst_params.height > src_params.height && dst_params.width > src_params.width); +} + +bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, + Surface intersect) { + if (IsReinterpretInvalid(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { + if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { + UnregisterSurface(intersect); + return false; + } + FlushObject(intersect); + FlushObject(triggering_surface); + intersect->MarkForReload(true); + } + return true; +} + +void RasterizerCacheOpenGL::SignalPreDrawCall() { + if (texception && GLAD_GL_ARB_texture_barrier) { + glTextureBarrier(); + } + texception = false; +} + +void RasterizerCacheOpenGL::SignalPostDrawCall() { + for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { + if (current_color_buffers[i] != nullptr) { + Surface intersect = + CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); + if (intersect != nullptr) { + PartialReinterpretSurface(current_color_buffers[i], intersect); + texception = true; + } + } + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 89d733c50..b3afad139 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -28,12 +28,13 @@ namespace OpenGL { class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; +using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; using SurfaceTarget = VideoCore::Surface::SurfaceTarget; using SurfaceType = VideoCore::Surface::SurfaceType; using PixelFormat = VideoCore::Surface::PixelFormat; using ComponentType = VideoCore::Surface::ComponentType; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct SurfaceParams { enum class SurfaceClass { @@ -71,7 +72,7 @@ struct SurfaceParams { } /// Returns the rectangle corresponding to this surface - MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const; + Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; /// Returns the total size of this surface in bytes, adjusted for compression std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { @@ -140,10 +141,18 @@ struct SurfaceParams { return offset; } + std::size_t GetMipmapSingleSize(u32 mip_level) const { + return InnerMipmapMemorySize(mip_level, false, is_layered); + } + u32 MipWidth(u32 mip_level) const { return std::max(1U, width >> mip_level); } + u32 MipWidthGobAligned(u32 mip_level) const { + return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); + } + u32 MipHeight(u32 mip_level) const { return std::max(1U, height >> mip_level); } @@ -288,6 +297,7 @@ struct SurfaceParams { bool srgb_conversion; // Parameters used for caching VAddr addr; + u8* host_ptr; Tegra::GPUVAddr gpu_addr; std::size_t size_in_bytes; std::size_t size_in_bytes_gl; @@ -336,9 +346,9 @@ class RasterizerOpenGL; class CachedSurface final : public RasterizerCacheObject { public: - CachedSurface(const SurfaceParams& params); + explicit CachedSurface(const SurfaceParams& params); - VAddr GetAddr() const override { + VAddr GetCpuAddr() const override { return params.addr; } @@ -346,6 +356,10 @@ public: return cached_size_in_bytes; } + std::size_t GetMemorySize() const { + return memory_size; + } + void Flush() override { FlushGLBuffer(); } @@ -354,31 +368,19 @@ public: return texture; } - const OGLTexture& TextureLayer() { - if (params.is_array) { - return Texture(); + const OGLTexture& Texture(bool as_array) { + if (params.is_array == as_array) { + return texture; + } else { + EnsureTextureDiscrepantView(); + return discrepant_view; } - EnsureTextureView(); - return texture_view; } GLenum Target() const { return gl_target; } - GLenum TargetLayer() const { - using VideoCore::Surface::SurfaceTarget; - switch (params.target) { - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP_ARRAY; - } - return Target(); - } - const SurfaceParams& GetSurfaceParams() const { return params; } @@ -395,19 +397,42 @@ public: Tegra::Texture::SwizzleSource swizzle_z, Tegra::Texture::SwizzleSource swizzle_w); + void MarkReinterpreted() { + reinterpreted = true; + } + + bool IsReinterpreted() const { + return reinterpreted; + } + + void MarkForReload(bool reload) { + must_reload = reload; + } + + bool MustReload() const { + return must_reload; + } + + bool IsUploaded() const { + return params.identity == SurfaceParams::SurfaceClass::Uploaded; + } + private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); - void EnsureTextureView(); + void EnsureTextureDiscrepantView(); OGLTexture texture; - OGLTexture texture_view; + OGLTexture discrepant_view; std::vector<std::vector<u8>> gl_buffer; SurfaceParams params{}; GLenum gl_target{}; GLenum gl_internal_format{}; std::size_t cached_size_in_bytes{}; std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; + std::size_t memory_size; + bool reinterpreted = false; + bool must_reload = false; }; class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { @@ -425,13 +450,16 @@ public: Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); /// Tries to find a framebuffer using on the provided CPU address - Surface TryFindFramebufferSurface(VAddr addr) const; + Surface TryFindFramebufferSurface(const u8* host_ptr) const; /// Copies the contents of one surface to another void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const MathUtil::Rectangle<u32>& src_rect, - const MathUtil::Rectangle<u32>& dst_rect); + const Common::Rectangle<u32>& src_rect, + const Common::Rectangle<u32>& dst_rect); + + void SignalPreDrawCall(); + void SignalPostDrawCall(); private: void LoadSurface(const Surface& surface); @@ -449,6 +477,10 @@ private: /// Tries to get a reserved surface for the specified parameters Surface TryGetReservedSurface(const SurfaceParams& params); + // Partialy reinterpret a surface based on a triggering_surface that collides with it. + // returns true if the reinterpret was successful, false in case it was not. + bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); + /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); @@ -465,12 +497,50 @@ private: OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; + bool texception = false; + /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one /// using the new format. OGLBuffer copy_pbo; - std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; + std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + + using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; + using SurfaceInterval = typename SurfaceIntervalCache::interval_type; + + static auto GetReinterpretInterval(const Surface& object) { + return SurfaceInterval::right_open(object->GetCacheAddr() + 1, + object->GetCacheAddr() + object->GetMemorySize() - 1); + } + + // Reinterpreted surfaces are very fragil as the game may keep rendering into them. + SurfaceIntervalCache reinterpreted_surfaces; + + void RegisterReinterpretSurface(Surface reinterpret_surface) { + auto interval = GetReinterpretInterval(reinterpret_surface); + reinterpreted_surfaces.insert({interval, reinterpret_surface}); + reinterpret_surface->MarkReinterpreted(); + } + + Surface CollideOnReinterpretedSurface(CacheAddr addr) const { + const SurfaceInterval interval{addr}; + for (auto& pair : + boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { + return pair.second; + } + return nullptr; + } + + /// Unregisters an object from the cache + void UnregisterSurface(const Surface& object) { + if (object->IsReinterpreted()) { + auto interval = GetReinterpretInterval(object); + reinterpreted_surfaces.erase(interval); + } + Unregister(object); + } }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4883e4f62..60a04e146 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) { } /// Gets the shader program code from memory for the specified address -ProgramCode GetShaderCode(VAddr addr) { +ProgramCode GetShaderCode(const u8* host_ptr) { ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); - Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); + std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); return program_code; } @@ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() { } // namespace -CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, +CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - ProgramCode&& program_code, ProgramCode&& program_code_b) - : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, - disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { + ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) + : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, + program_type{program_type}, disk_cache{disk_cache}, + precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { const std::size_t code_size = CalculateProgramSize(program_code); const std::size_t code_size_b = @@ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro disk_cache.SaveRaw(raw); } -CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, +CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - GLShader::ProgramResult result) - : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, - disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { + GLShader::ProgramResult result, u8* host_ptr) + : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, + disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ + host_ptr} { code = std::move(result.first); entries = result.second; @@ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); } - LabelGLObject(GL_PROGRAM, program->handle, addr); + LabelGLObject(GL_PROGRAM, program->handle, guest_addr); } handle = program->handle; @@ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); } - LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); + LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); return target_program->handle; }; @@ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const VAddr program_addr{GetShaderAddress(program)}; // Look up shader in the cache based on address - Shader shader{TryGet(program_addr)}; + const auto& host_ptr{Memory::GetPointer(program_addr)}; + Shader shader{TryGet(host_ptr)}; if (!shader) { // No shader found - create a new one - ProgramCode program_code = GetShaderCode(program_addr); + const auto& host_ptr{Memory::GetPointer(program_addr)}; + ProgramCode program_code{GetShaderCode(host_ptr)}; ProgramCode program_code_b; if (program == Maxwell::ShaderProgram::VertexA) { - program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); + program_code_b = GetShaderCode( + Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); } const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); @@ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { if (found != precompiled_shaders.end()) { shader = std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second); + precompiled_programs, found->second, host_ptr); } else { shader = std::make_shared<CachedShader>( program_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b)); + std::move(program_code), std::move(program_code_b), host_ptr); } Register(shader); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 97eed192f..81fe716b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; class CachedShader final : public RasterizerCacheObject { public: - explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, + explicit CachedShader(VAddr guest_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - ProgramCode&& program_code, ProgramCode&& program_code_b); + ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); - explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, - ShaderDiskCacheOpenGL& disk_cache, + explicit CachedShader(VAddr guest_addr, u64 unique_identifier, + Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, - GLShader::ProgramResult result); + GLShader::ProgramResult result, u8* host_ptr); - VAddr GetAddr() const override { - return addr; + VAddr GetCpuAddr() const override { + return guest_addr; } std::size_t GetSizeInBytes() const override { @@ -91,7 +91,8 @@ private: ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; - VAddr addr{}; + u8* host_ptr{}; + VAddr guest_addr{}; u64 unique_identifier{}; Maxwell::ShaderProgram program_type{}; ShaderDiskCacheOpenGL& disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index db18f4dbe..11d1169f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -5,7 +5,9 @@ #include <array> #include <string> #include <string_view> +#include <utility> #include <variant> +#include <vector> #include <fmt/format.h> @@ -20,6 +22,7 @@ namespace OpenGL::GLShader { using Tegra::Shader::Attribute; +using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; @@ -288,34 +291,22 @@ private: code.AddNewLine(); } - std::string GetInputFlags(const IpaMode& input_mode) { - const IpaSampleMode sample_mode = input_mode.sampling_mode; - const IpaInterpMode interp_mode = input_mode.interpolation_mode; + std::string GetInputFlags(AttributeUse attribute) { std::string out; - switch (interp_mode) { - case IpaInterpMode::Flat: + switch (attribute) { + case AttributeUse::Constant: out += "flat "; break; - case IpaInterpMode::Linear: + case AttributeUse::ScreenLinear: out += "noperspective "; break; - case IpaInterpMode::Perspective: + case AttributeUse::Perspective: // Default, Smooth break; default: - UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); - } - switch (sample_mode) { - case IpaSampleMode::Centroid: - // It can be implemented with the "centroid " keyword in GLSL - UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); - break; - case IpaSampleMode::Default: - // Default, n/a - break; - default: - UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); + LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); + UNREACHABLE(); } return out; } @@ -324,16 +315,11 @@ private: const auto& attributes = ir.GetInputAttributes(); for (const auto element : attributes) { const Attribute::Index index = element.first; - const IpaMode& input_mode = *element.second.begin(); if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { // Skip when it's not a generic attribute continue; } - ASSERT(element.second.size() > 0); - UNIMPLEMENTED_IF_MSG(element.second.size() > 1, - "Multiple input flag modes are not supported in GLSL"); - // TODO(bunnei): Use proper number of elements for these u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); if (stage != ShaderStage::Vertex) { @@ -345,8 +331,14 @@ private: if (stage == ShaderStage::Geometry) { attr = "gs_" + attr + "[]"; } - code.AddLine("layout (location = " + std::to_string(idx) + ") " + - GetInputFlags(input_mode) + "in vec4 " + attr + ';'); + std::string suffix; + if (stage == ShaderStage::Fragment) { + const auto input_mode = + header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); + suffix = GetInputFlags(input_mode); + } + code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + + attr + ';'); } if (!attributes.empty()) code.AddNewLine(); @@ -727,7 +719,7 @@ private: } std::string GenerateTexture(Operation operation, const std::string& func, - bool is_extra_int = false) { + const std::vector<std::pair<Type, Node>>& extras) { constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -748,36 +740,47 @@ private: expr += Visit(operation[i]); const std::size_t next = i + 1; - if (next < count || has_array || has_shadow) + if (next < count) expr += ", "; } if (has_array) { - expr += "float(ftoi(" + Visit(meta->array) + "))"; + expr += ", float(ftoi(" + Visit(meta->array) + "))"; } if (has_shadow) { - if (has_array) - expr += ", "; - expr += Visit(meta->depth_compare); + expr += ", " + Visit(meta->depth_compare); } expr += ')'; - for (const Node extra : meta->extras) { + for (const auto& extra_pair : extras) { + const auto [type, operand] = extra_pair; + if (operand == nullptr) { + continue; + } expr += ", "; - if (is_extra_int) { - if (const auto immediate = std::get_if<ImmediateNode>(extra)) { + + switch (type) { + case Type::Int: + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { // Inline the string as an immediate integer in GLSL (some extra arguments are // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else { - expr += "ftoi(" + Visit(extra) + ')'; + expr += "ftoi(" + Visit(operand) + ')'; } - } else { - expr += Visit(extra); + break; + case Type::Float: + expr += Visit(operand); + break; + default: { + const auto type_int = static_cast<u32>(type); + UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); + expr += '0'; + break; + } } } - expr += ')'; - return expr; + return expr + ')'; } std::string Assign(Operation operation) { @@ -1156,7 +1159,7 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "texture"); + std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1167,7 +1170,7 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "textureLod"); + std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1178,7 +1181,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + + const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; + return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + GetSwizzle(meta->element); } @@ -1207,8 +1211,8 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + - GetSwizzle(meta->element) + "))"; + return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + + " * vec2(256))" + GetSwizzle(meta->element) + "))"; } return "0"; } @@ -1234,9 +1238,9 @@ private: else if (next < count) expr += ", "; } - for (std::size_t i = 0; i < meta->extras.size(); ++i) { + if (meta->lod) { expr += ", "; - expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); + expr += CastOperand(Visit(meta->lod), Type::Int); } expr += ')'; @@ -1584,4 +1588,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st return {decompiler.GetResult(), decompiler.GetShaderEntries()}; } -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 81882822b..82fc4d44b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <cstring> #include <fmt/format.h> #include <lz4.h> diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 04e1db911..7d96649af 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in vec4 position; +layout (location = 0) in noperspective vec4 position; layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; @@ -172,4 +172,4 @@ void main() { return {out, program.second}; } -} // namespace OpenGL::GLShader
\ No newline at end of file +} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 81af803bc..9419326a3 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -11,7 +11,9 @@ namespace OpenGL { OpenGLState OpenGLState::cur_state; + bool OpenGLState::s_rgb_used; + OpenGLState::OpenGLState() { // These all match default OpenGL values geometry_shaders.enabled = false; @@ -112,7 +114,6 @@ void OpenGLState::ApplyDefaultState() { } void OpenGLState::ApplySRgb() const { - // sRGB if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { if (framebuffer_srgb.enabled) { // Track if sRGB is used @@ -125,23 +126,20 @@ void OpenGLState::ApplySRgb() const { } void OpenGLState::ApplyCulling() const { - // Culling - const bool cull_changed = cull.enabled != cur_state.cull.enabled; - if (cull_changed) { + if (cull.enabled != cur_state.cull.enabled) { if (cull.enabled) { glEnable(GL_CULL_FACE); } else { glDisable(GL_CULL_FACE); } } - if (cull.enabled) { - if (cull_changed || cull.mode != cur_state.cull.mode) { - glCullFace(cull.mode); - } - if (cull_changed || cull.front_face != cur_state.cull.front_face) { - glFrontFace(cull.front_face); - } + if (cull.mode != cur_state.cull.mode) { + glCullFace(cull.mode); + } + + if (cull.front_face != cur_state.cull.front_face) { + glFrontFace(cull.front_face); } } @@ -172,72 +170,63 @@ void OpenGLState::ApplyColorMask() const { } void OpenGLState::ApplyDepth() const { - // Depth test - const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled; - if (depth_test_changed) { + if (depth.test_enabled != cur_state.depth.test_enabled) { if (depth.test_enabled) { glEnable(GL_DEPTH_TEST); } else { glDisable(GL_DEPTH_TEST); } } - if (depth.test_enabled && - (depth_test_changed || depth.test_func != cur_state.depth.test_func)) { + + if (depth.test_func != cur_state.depth.test_func) { glDepthFunc(depth.test_func); } - // Depth mask + if (depth.write_mask != cur_state.depth.write_mask) { glDepthMask(depth.write_mask); } } void OpenGLState::ApplyPrimitiveRestart() const { - const bool primitive_restart_changed = - primitive_restart.enabled != cur_state.primitive_restart.enabled; - if (primitive_restart_changed) { + if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { if (primitive_restart.enabled) { glEnable(GL_PRIMITIVE_RESTART); } else { glDisable(GL_PRIMITIVE_RESTART); } } - if (primitive_restart_changed || - (primitive_restart.enabled && - primitive_restart.index != cur_state.primitive_restart.index)) { + + if (primitive_restart.index != cur_state.primitive_restart.index) { glPrimitiveRestartIndex(primitive_restart.index); } } void OpenGLState::ApplyStencilTest() const { - const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled; - if (stencil_test_changed) { + if (stencil.test_enabled != cur_state.stencil.test_enabled) { if (stencil.test_enabled) { glEnable(GL_STENCIL_TEST); } else { glDisable(GL_STENCIL_TEST); } } - if (stencil.test_enabled) { - auto config_stencil = [stencil_test_changed](GLenum face, const auto& config, - const auto& prev_config) { - if (stencil_test_changed || config.test_func != prev_config.test_func || - config.test_ref != prev_config.test_ref || - config.test_mask != prev_config.test_mask) { - glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); - } - if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail || - config.action_depth_pass != prev_config.action_depth_pass || - config.action_stencil_fail != prev_config.action_stencil_fail) { - glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, - config.action_depth_pass); - } - if (config.write_mask != prev_config.write_mask) { - glStencilMaskSeparate(face, config.write_mask); - } - }; - config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front); - config_stencil(GL_BACK, stencil.back, cur_state.stencil.back); - } + + const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) { + if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || + config.test_mask != prev_config.test_mask) { + glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); + } + if (config.action_depth_fail != prev_config.action_depth_fail || + config.action_depth_pass != prev_config.action_depth_pass || + config.action_stencil_fail != prev_config.action_stencil_fail) { + glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, + config.action_depth_pass); + } + if (config.write_mask != prev_config.write_mask) { + glStencilMaskSeparate(face, config.write_mask); + } + }; + ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); + ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } // Viewport does not affects glClearBuffer so emulate viewport using scissor test void OpenGLState::EmulateViewportWithScissor() { @@ -278,19 +267,18 @@ void OpenGLState::ApplyViewport() const { updated.depth_range_far != current.depth_range_far) { glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); } - const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; - if (scissor_changed) { + + if (updated.scissor.enabled != current.scissor.enabled) { if (updated.scissor.enabled) { glEnablei(GL_SCISSOR_TEST, i); } else { glDisablei(GL_SCISSOR_TEST, i); } } - if (updated.scissor.enabled && - (scissor_changed || updated.scissor.x != current.scissor.x || - updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height)) { + + if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || + updated.scissor.width != current.scissor.width || + updated.scissor.height != current.scissor.height) { glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, updated.scissor.height); } @@ -302,22 +290,23 @@ void OpenGLState::ApplyViewport() const { updated.height != current.height) { glViewport(updated.x, updated.y, updated.width, updated.height); } + if (updated.depth_range_near != current.depth_range_near || updated.depth_range_far != current.depth_range_far) { glDepthRange(updated.depth_range_near, updated.depth_range_far); } - const bool scissor_changed = updated.scissor.enabled != current.scissor.enabled; - if (scissor_changed) { + + if (updated.scissor.enabled != current.scissor.enabled) { if (updated.scissor.enabled) { glEnable(GL_SCISSOR_TEST); } else { glDisable(GL_SCISSOR_TEST); } } - if (updated.scissor.enabled && (scissor_changed || updated.scissor.x != current.scissor.x || - updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height)) { + + if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || + updated.scissor.width != current.scissor.width || + updated.scissor.height != current.scissor.height) { glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, updated.scissor.height); } @@ -327,8 +316,7 @@ void OpenGLState::ApplyViewport() const { void OpenGLState::ApplyGlobalBlending() const { const Blend& current = cur_state.blend[0]; const Blend& updated = blend[0]; - const bool blend_changed = updated.enabled != current.enabled; - if (blend_changed) { + if (updated.enabled != current.enabled) { if (updated.enabled) { glEnable(GL_BLEND); } else { @@ -338,15 +326,14 @@ void OpenGLState::ApplyGlobalBlending() const { if (!updated.enabled) { return; } - if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + if (updated.src_rgb_func != current.src_rgb_func || updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) { glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (blend_changed || updated.rgb_equation != current.rgb_equation || - updated.a_equation != current.a_equation) { + if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); } } @@ -354,26 +341,22 @@ void OpenGLState::ApplyGlobalBlending() const { void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { const Blend& updated = blend[target]; const Blend& current = cur_state.blend[target]; - const bool blend_changed = updated.enabled != current.enabled || force; - if (blend_changed) { + if (updated.enabled != current.enabled || force) { if (updated.enabled) { glEnablei(GL_BLEND, static_cast<GLuint>(target)); } else { glDisablei(GL_BLEND, static_cast<GLuint>(target)); } } - if (!updated.enabled) { - return; - } - if (blend_changed || updated.src_rgb_func != current.src_rgb_func || + + if (updated.src_rgb_func != current.src_rgb_func || updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) { glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (blend_changed || updated.rgb_equation != current.rgb_equation || - updated.a_equation != current.a_equation) { + if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, updated.a_equation); } @@ -397,8 +380,7 @@ void OpenGLState::ApplyBlending() const { } void OpenGLState::ApplyLogicOp() const { - const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled; - if (logic_op_changed) { + if (logic_op.enabled != cur_state.logic_op.enabled) { if (logic_op.enabled) { glEnable(GL_COLOR_LOGIC_OP); } else { @@ -406,14 +388,12 @@ void OpenGLState::ApplyLogicOp() const { } } - if (logic_op.enabled && - (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) { + if (logic_op.operation != cur_state.logic_op.operation) { glLogicOp(logic_op.operation); } } void OpenGLState::ApplyPolygonOffset() const { - const bool fill_enable_changed = polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; const bool line_enable_changed = @@ -448,9 +428,7 @@ void OpenGLState::ApplyPolygonOffset() const { } } - if ((polygon_offset.fill_enable || polygon_offset.line_enable || polygon_offset.point_enable) && - (factor_changed || units_changed || clamp_changed)) { - + if (factor_changed || units_changed || clamp_changed) { if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); } else { @@ -483,7 +461,7 @@ void OpenGLState::ApplyTextures() const { if (has_delta) { glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - textures.data()); + textures.data() + first); } } @@ -504,7 +482,7 @@ void OpenGLState::ApplySamplers() const { } if (has_delta) { glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), - samplers.data()); + samplers.data() + first); } } @@ -528,9 +506,9 @@ void OpenGLState::ApplyDepthClamp() const { depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { return; } - if (depth_clamp.far_plane != depth_clamp.near_plane) { - UNIMPLEMENTED_MSG("Unimplemented Depth Clamp Separation!"); - } + UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, + "Unimplemented Depth Clamp Separation!"); + if (depth_clamp.far_plane || depth_clamp.near_plane) { glEnable(GL_DEPTH_CLAMP); } else { diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d40666ac6..b97576309 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -167,9 +167,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, Memory::FlushMode::Flush); - VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, - Memory::GetPointer(framebuffer_addr), - gl_framebuffer_data.data(), true); + constexpr u32 linear_bpp = 4; + VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, + framebuffer.width, framebuffer.height, bytes_per_pixel, + linear_bpp, Memory::GetPointer(framebuffer_addr), + gl_framebuffer_data.data()); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); @@ -244,6 +246,21 @@ void RendererOpenGL::InitOpenGLObjects() { LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); } +void RendererOpenGL::AddTelemetryFields() { + const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; + const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; + const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; + + LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); + LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); + LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); + + auto& telemetry_session = system.TelemetrySession(); + telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); + telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); + telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); +} + void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; @@ -257,6 +274,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer) { texture.width = framebuffer.width; texture.height = framebuffer.height; + texture.pixel_format = framebuffer.pixel_format; GLint internal_format; switch (framebuffer.pixel_format) { @@ -380,7 +398,8 @@ void RendererOpenGL::CaptureScreenshot() { GLuint renderbuffer; glGenRenderbuffers(1, &renderbuffer); glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); - glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height); + glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width, + layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); DrawScreen(layout); @@ -464,17 +483,7 @@ bool RendererOpenGL::Init() { glDebugMessageCallback(DebugHandler, nullptr); } - const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; - const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; - const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; - - LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); - LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); - LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); - - Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); - Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); - Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); + AddTelemetryFields(); if (!GLAD_GL_VERSION_4_3) { return false; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 7e13e566b..6cbf9d2cb 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -39,7 +39,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; + const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; @@ -60,6 +60,7 @@ public: private: void InitOpenGLObjects(); + void AddTelemetryFields(); void CreateRasterizer(); void ConfigureFramebufferTexture(TextureInfo& texture, @@ -102,7 +103,7 @@ private: /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; - MathUtil::Rectangle<int> framebuffer_crop_rect; + Common::Rectangle<int> framebuffer_crop_rect; }; } // namespace OpenGL |
