From 2b58652f0897053d4da04deb586490220ab5a774 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 27 Jul 2019 19:40:10 -0300 Subject: maxwell_3d: Slow implementation of passed samples (query 21) Implements GL_SAMPLES_PASSED by waiting immediately for queries. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b0eb14c8b..8d132732a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -547,6 +547,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + samples_passed.UpdateState(regs.samplecnt_enable); + SyncRasterizeEnable(state); SyncColorMask(); SyncFragmentColorClampState(); @@ -709,6 +712,27 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); } +void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { + switch (type) { + case VideoCore::QueryType::SamplesPassed: + samples_passed.Reset(); + break; + default: + UNIMPLEMENTED_MSG("type={}", static_cast(type)); + break; + } +} + +u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { + switch (type) { + case VideoCore::QueryType::SamplesPassed: + return samples_passed.Query(); + default: + UNIMPLEMENTED_MSG("type={}", static_cast(type)); + return 1; + } +} + void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { -- cgit v1.2.3 From fe1238be7a14b98c1698b5f8398b0efe83ade43a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 26 Nov 2019 18:30:21 -0300 Subject: gl_rasterizer: Add queued commands counter Keep track of the queued OpenGL commands that can signal a fence if waited on. As a side effect, we avoid calls to glFlush when no commands are queued. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8d132732a..652db705b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -541,6 +541,8 @@ void RasterizerOpenGL::Clear() { } else if (use_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } + + ++num_queued_commands; } void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { @@ -641,6 +643,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { glTextureBarrier(); } + ++num_queued_commands; + const GLuint base_instance = static_cast(gpu.regs.vb_base_instance); const GLsizei num_instances = static_cast(is_instanced ? gpu.mme_draw.instance_count : 1); @@ -710,6 +714,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { state.ApplyProgramPipeline(); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); + ++num_queued_commands; } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -762,10 +767,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void RasterizerOpenGL::FlushCommands() { + // Only flush when we have commands queued to OpenGL. + if (num_queued_commands == 0) { + return; + } + num_queued_commands = 0; glFlush(); } void RasterizerOpenGL::TickFrame() { + // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. + num_queued_commands = 0; + buffer_cache.TickFrame(); } -- cgit v1.2.3 From aae8c180cbbf91ba12f53c37e81a97d4b3cc4ccd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 26 Nov 2019 18:52:15 -0300 Subject: gl_query_cache: Implement host queries using a deferred cache Instead of waiting immediately for executed commands, defer the query until the guest CPU reads it. This way we get closer to what the guest program is doing. To archive this we have to build a dependency queue, because host APIs (like OpenGL and Vulkan) use ranged queries instead of counters like NVN. Waiting for queries implicitly uses fences and this requires a command being queued, otherwise the driver will lock waiting until a timeout. To fix this when there are no commands queued, we explicitly call glFlush. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 30 ++++++++---------------- 1 file changed, 10 insertions(+), 20 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 652db705b..827f85884 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -25,6 +25,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" +#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { + shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, + screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { shader_program_manager = std::make_unique(); state.draw.shader_program = 0; state.Apply(); @@ -548,9 +549,9 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; - samples_passed.UpdateState(regs.samplecnt_enable); + + query_cache.UpdateCounters(); SyncRasterizeEnable(state); SyncColorMask(); @@ -718,24 +719,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - samples_passed.Reset(); - break; - default: - UNIMPLEMENTED_MSG("type={}", static_cast(type)); - break; - } + query_cache.ResetCounter(type); } -u64 RasterizerOpenGL::Query(VideoCore::QueryType type) { - switch (type) { - case VideoCore::QueryType::SamplesPassed: - return samples_passed.Query(); - default: - UNIMPLEMENTED_MSG("type={}", static_cast(type)); - return 1; - } +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { + query_cache.Query(gpu_addr, type); } void RasterizerOpenGL::FlushAll() {} @@ -747,6 +735,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); + query_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -757,6 +746,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); + query_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { -- cgit v1.2.3 From 73d2d3342dc8867d32f08f89b2ca36ff071598dc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 28 Nov 2019 02:15:34 -0300 Subject: gl_query_cache: Optimize query cache Use a custom cache instead of relying on a ranged cache. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 827f85884..4bdc8db85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -722,8 +722,9 @@ void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { query_cache.ResetCounter(type); } -void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { - query_cache.Query(gpu_addr, type); +void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, + std::optional timestamp) { + query_cache.Query(gpu_addr, type, timestamp); } void RasterizerOpenGL::FlushAll() {} -- cgit v1.2.3