diff options
Diffstat (limited to 'src/video_core')
27 files changed, 281 insertions, 127 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5d6d217bb..54a902f56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: - return StartCBData(method); + return ProcessCBData(argument); case MAXWELL3D_REG_INDEX(cb_bind[0]): return ProcessCBBind(0); case MAXWELL3D_REG_INDEX(cb_bind[1]): @@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(4); case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): return DrawArrays(); + case MAXWELL3D_REG_INDEX(small_index): + regs.index_array.count = regs.small_index.count; + regs.index_array.first = regs.small_index.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + return DrawArrays(); + case MAXWELL3D_REG_INDEX(topology_override): + use_topology_override = true; + return; case MAXWELL3D_REG_INDEX(clear_buffers): return ProcessClearBuffers(); case MAXWELL3D_REG_INDEX(query.query_get): @@ -248,14 +257,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) } void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { - if (method == cb_data_state.current) { - regs.reg_array[method] = method_argument; - ProcessCBData(method_argument); - return; - } else if (cb_data_state.current != null_cb_data) { - FinishCBData(); - } - // It is an error to write to a register other than the current macro's ARG register before it // has finished execution. if (executing_macro != 0) { @@ -302,7 +303,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14: case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: - ProcessCBMultiData(method, base_start, amount); + ProcessCBMultiData(base_start, amount); break; default: for (std::size_t i = 0; i < amount; i++) { @@ -360,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } } +void Maxwell3D::ProcessTopologyOverride() { + using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; + using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; + + PrimitiveTopology topology{}; + + switch (regs.topology_override) { + case PrimitiveTopologyOverride::None: + topology = regs.draw.topology; + break; + case PrimitiveTopologyOverride::Points: + topology = PrimitiveTopology::Points; + break; + case PrimitiveTopologyOverride::Lines: + topology = PrimitiveTopology::Lines; + break; + case PrimitiveTopologyOverride::LineStrip: + topology = PrimitiveTopology::LineStrip; + break; + default: + topology = static_cast<PrimitiveTopology>(regs.topology_override); + break; + } + + if (use_topology_override) { + regs.draw.topology.Assign(topology); + } +} + void Maxwell3D::FlushMMEInlineDraw() { LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); @@ -370,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { rasterizer->Draw(is_indexed, true); @@ -529,6 +561,8 @@ void Maxwell3D::DrawArrays() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + if (regs.draw.instance_next) { // Increment the current instance *before* drawing. state.current_instance += 1; @@ -587,46 +621,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) { rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); } -void Maxwell3D::ProcessCBData(u32 value) { - const u32 id = cb_data_state.id; - cb_data_state.buffer[id][cb_data_state.counter] = value; - // Increment the current buffer position. - regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; - cb_data_state.counter++; -} - -void Maxwell3D::StartCBData(u32 method) { - constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data); - cb_data_state.start_pos = regs.const_buffer.cb_pos; - cb_data_state.id = method - first_cb_data; - cb_data_state.current = method; - cb_data_state.counter = 0; - ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]); -} - -void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) { - if (cb_data_state.current != method) { - if (cb_data_state.current != null_cb_data) { - FinishCBData(); - } - constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data); - cb_data_state.start_pos = regs.const_buffer.cb_pos; - cb_data_state.id = method - first_cb_data; - cb_data_state.current = method; - cb_data_state.counter = 0; - } - const std::size_t id = cb_data_state.id; - const std::size_t size = amount; - std::size_t i = 0; - for (; i < size; i++) { - cb_data_state.buffer[id][cb_data_state.counter] = start_base[i]; - cb_data_state.counter++; - } - // Increment the current buffer position. - regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount; -} - -void Maxwell3D::FinishCBData() { +void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { // Write the input value to the current const buffer at the current position. const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); ASSERT(buffer_address != 0); @@ -634,14 +629,16 @@ void Maxwell3D::FinishCBData() { // Don't allow writing past the end of the buffer. ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size); - const GPUVAddr address{buffer_address + cb_data_state.start_pos}; - const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos; + const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; + const size_t copy_size = amount * sizeof(u32); + memory_manager.WriteBlock(address, start_base, copy_size); - const u32 id = cb_data_state.id; - memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); + // Increment the current buffer position. + regs.const_buffer.cb_pos += static_cast<u32>(copy_size); +} - cb_data_state.id = null_cb_data; - cb_data_state.current = null_cb_data; +void Maxwell3D::ProcessCBData(u32 value) { + ProcessCBMultiData(&value, 1); } Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dc9df6c8b..357a74c70 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -367,6 +367,22 @@ public: Patches = 0xe, }; + // Constants as from NVC0_3D_UNK1970_D3D + // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598 + enum class PrimitiveTopologyOverride : u32 { + None = 0x0, + Points = 0x1, + Lines = 0x2, + LineStrip = 0x3, + Triangles = 0x4, + TriangleStrip = 0x5, + LinesAdjacency = 0xa, + LineStripAdjacency = 0xb, + TrianglesAdjacency = 0xc, + TriangleStripAdjacency = 0xd, + Patches = 0xe, + }; + enum class IndexFormat : u32 { UnsignedByte = 0x0, UnsignedShort = 0x1, @@ -1200,7 +1216,12 @@ public: } } index_array; - INSERT_PADDING_WORDS_NOINIT(0x7); + union { + BitField<0, 16, u32> first; + BitField<16, 16, u32> count; + } small_index; + + INSERT_PADDING_WORDS_NOINIT(0x6); INSERT_PADDING_WORDS_NOINIT(0x1F); @@ -1244,7 +1265,11 @@ public: BitField<11, 1, u32> depth_clamp_disabled; } view_volume_clip_control; - INSERT_PADDING_WORDS_NOINIT(0x1F); + INSERT_PADDING_WORDS_NOINIT(0xC); + + PrimitiveTopologyOverride topology_override; + + INSERT_PADDING_WORDS_NOINIT(0x12); u32 depth_bounds_enable; @@ -1520,10 +1545,8 @@ private: void ProcessSyncPoint(); /// Handles a write to the CB_DATA[i] register. - void StartCBData(u32 method); void ProcessCBData(u32 value); - void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount); - void FinishCBData(); + void ProcessCBMultiData(const u32* start_base, u32 amount); /// Handles a write to the CB_BIND register. void ProcessCBBind(size_t stage_index); @@ -1531,6 +1554,9 @@ private: /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); + /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) + void ProcessTopologyOverride(); + // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); @@ -1555,20 +1581,10 @@ private: /// Interpreter for the macro codes uploaded to the GPU. std::unique_ptr<MacroEngine> macro_engine; - static constexpr u32 null_cb_data = 0xFFFFFFFF; - struct CBDataState { - static constexpr size_t inline_size = 0x4000; - std::array<std::array<u32, inline_size>, 16> buffer; - u32 current{null_cb_data}; - u32 id{null_cb_data}; - u32 start_pos{}; - u32 counter{}; - }; - CBDataState cb_data_state; - Upload::State upload_state; bool execute_on{true}; + bool use_topology_override{false}; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1685,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); +ASSERT_REG_POSITION(small_index, 0x5F9); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(vp_point_size, 0x644); @@ -1694,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); +ASSERT_REG_POSITION(topology_override, 0x65C); ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 67388d980..1fc1358bc 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -53,7 +53,6 @@ void MaxwellDMA::Launch() { // TODO(Subv): Perform more research and implement all features of this engine. const LaunchDMA& launch = regs.launch_dma; - ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); ASSERT(regs.dst_params.origin.x == 0); @@ -79,6 +78,7 @@ void MaxwellDMA::Launch() { CopyPitchToBlockLinear(); } } + ReleaseSemaphore(); } void MaxwellDMA::CopyPitchToPitch() { @@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } +void MaxwellDMA::ReleaseSemaphore() { + const auto type = regs.launch_dma.semaphore_type; + const GPUVAddr address = regs.semaphore.address; + switch (type) { + case LaunchDMA::SemaphoreType::NONE: + break; + case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: + memory_manager.Write<u32>(address, regs.semaphore.payload); + break; + case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: + memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload)); + memory_manager.Write<u64>(address + 8, system.GPU().GetTicks()); + break; + default: + UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value())); + } +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index a04514425..2692cac8a 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -224,6 +224,8 @@ private: void FastCopyBlockLinearToPitch(); + void ReleaseSemaphore(); + Core::System& system; MemoryManager& memory_manager; diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 34dc6c596..f80d62c80 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -8,8 +8,6 @@ #include <queue> #include "common/common_types.h" -#include "common/settings.h" -#include "core/core.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index fd3e41434..af05d47d1 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -14,6 +14,7 @@ set(SHADER_FILES convert_d24s8_to_abgr8.frag convert_depth_to_float.frag convert_float_to_depth.frag + convert_s8d24_to_abgr8.frag full_screen_triangle.vert fxaa.frag fxaa.vert diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag new file mode 100644 index 000000000..c8a1683b8 --- /dev/null +++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag @@ -0,0 +1,23 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform isampler2D stencil_tex; + +layout(location = 0) out vec4 color; + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f)); + uint stencil = uint(textureLod(stencil_tex, coord, 0).r); + + highp uint depth_val = + uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0)); + lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r; + highp uvec4 components = + uvec4((uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu, stencil_val); + color.rgba = vec4(components) / (exp2(8.0) - 1.0); +} diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 392f82eb7..0173b54d8 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -18,7 +18,6 @@ #include "common/assert.h" #include "common/settings.h" -#include "core/core.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index bb204454e..c5f974080 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -5,9 +5,10 @@ #pragma once #include <atomic> +#include <functional> #include <memory> -#include <optional> +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/frontend/emu_window.h" #include "video_core/gpu.h" @@ -28,8 +29,11 @@ struct RendererSettings { Layout::FramebufferLayout screenshot_framebuffer_layout; }; -class RendererBase : NonCopyable { +class RendererBase { public: + YUZU_NON_COPYABLE(RendererBase); + YUZU_NON_MOVEABLE(RendererBase); + explicit RendererBase(Core::Frontend::EmuWindow& window, std::unique_ptr<Core::Frontend::GraphicsContext> context); virtual ~RendererBase(); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 151290101..293ad7d59 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -31,9 +31,8 @@ bool GLInnerFence::IsSignaled() const { return true; } ASSERT(sync_object.handle != 0); - GLsizei length; GLint sync_status; - glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status); + glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status); return sync_status == GL_SIGNALED; } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index f8495896c..9e6732abd 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline( case Settings::ShaderBackend::GLASM: if (!sources[stage].empty()) { assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); - if (in_parallel) { - // Make sure program is built before continuing when building in parallel - glGetString(GL_PROGRAM_ERROR_STRING_NV); - } } break; case Settings::ShaderBackend::SPIRV: @@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline( break; } } - if (in_parallel && backend != Settings::ShaderBackend::GLASM) { - // Make sure programs have built if we are building shaders in parallel - for (OGLProgram& program : source_programs) { - if (program.handle != 0) { - GLint status{}; - glGetProgramiv(program.handle, GL_LINK_STATUS, &status); - } - } + if (in_parallel) { + std::lock_guard lock{built_mutex}; + built_fence.Create(); + // Flush this context to ensure compilation commands and fence are in the GPU pipe. + glFlush(); + built_condvar.notify_one(); + } else { + is_built = true; } if (shader_notify) { shader_notify->MarkShaderComplete(); } - is_built = true; - built_condvar.notify_one(); }}; if (thread_worker) { thread_worker->QueueWork(std::move(func)); @@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); - if (!is_built.load(std::memory_order::relaxed)) { + if (!IsBuilt()) { WaitForBuild(); } const bool use_assembly{assembly_programs[0].handle != 0}; @@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { } void GraphicsPipeline::WaitForBuild() { - std::unique_lock lock{built_mutex}; - built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + if (built_fence.handle == 0) { + std::unique_lock lock{built_mutex}; + built_condvar.wait(lock, [this] { return built_fence.handle != 0; }); + } + ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED); + is_built = true; +} + +bool GraphicsPipeline::IsBuilt() noexcept { + if (is_built) { + return true; + } + if (built_fence.handle == 0) { + return false; + } + // Timeout of zero means this is non-blocking + const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0); + ASSERT(sync_status != GL_WAIT_FAILED); + is_built = sync_status != GL_TIMEOUT_EXPIRED; + return is_built; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 4e28d9a42..311d49f3f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -100,9 +100,7 @@ public: return writes_global_memory; } - [[nodiscard]] bool IsBuilt() const noexcept { - return is_built.load(std::memory_order::relaxed); - } + [[nodiscard]] bool IsBuilt() noexcept; template <typename Spec> static auto MakeConfigureSpecFunc() { @@ -154,7 +152,8 @@ private: std::mutex built_mutex; std::condition_variable built_condvar; - std::atomic_bool is_built{false}; + OGLSync built_fence{}; + bool is_built{false}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index b2d5bfd3b..84e07f8bd 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -7,12 +7,14 @@ #include <string_view> #include <utility> #include <glad/glad.h> -#include "common/common_types.h" +#include "common/common_funcs.h" namespace OpenGL { -class OGLRenderbuffer : private NonCopyable { +class OGLRenderbuffer final { public: + YUZU_NON_COPYABLE(OGLRenderbuffer); + OGLRenderbuffer() = default; OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -36,8 +38,10 @@ public: GLuint handle = 0; }; -class OGLTexture : private NonCopyable { +class OGLTexture final { public: + YUZU_NON_COPYABLE(OGLTexture); + OGLTexture() = default; OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -61,8 +65,10 @@ public: GLuint handle = 0; }; -class OGLTextureView : private NonCopyable { +class OGLTextureView final { public: + YUZU_NON_COPYABLE(OGLTextureView); + OGLTextureView() = default; OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -86,8 +92,10 @@ public: GLuint handle = 0; }; -class OGLSampler : private NonCopyable { +class OGLSampler final { public: + YUZU_NON_COPYABLE(OGLSampler); + OGLSampler() = default; OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -111,8 +119,10 @@ public: GLuint handle = 0; }; -class OGLShader : private NonCopyable { +class OGLShader final { public: + YUZU_NON_COPYABLE(OGLShader); + OGLShader() = default; OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -132,8 +142,10 @@ public: GLuint handle = 0; }; -class OGLProgram : private NonCopyable { +class OGLProgram final { public: + YUZU_NON_COPYABLE(OGLProgram); + OGLProgram() = default; OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -154,8 +166,10 @@ public: GLuint handle = 0; }; -class OGLAssemblyProgram : private NonCopyable { +class OGLAssemblyProgram final { public: + YUZU_NON_COPYABLE(OGLAssemblyProgram); + OGLAssemblyProgram() = default; OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -176,8 +190,10 @@ public: GLuint handle = 0; }; -class OGLPipeline : private NonCopyable { +class OGLPipeline final { public: + YUZU_NON_COPYABLE(OGLPipeline); + OGLPipeline() = default; OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {} @@ -198,8 +214,10 @@ public: GLuint handle = 0; }; -class OGLBuffer : private NonCopyable { +class OGLBuffer final { public: + YUZU_NON_COPYABLE(OGLBuffer); + OGLBuffer() = default; OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -223,8 +241,10 @@ public: GLuint handle = 0; }; -class OGLSync : private NonCopyable { +class OGLSync final { public: + YUZU_NON_COPYABLE(OGLSync); + OGLSync() = default; OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {} @@ -247,8 +267,10 @@ public: GLsync handle = 0; }; -class OGLFramebuffer : private NonCopyable { +class OGLFramebuffer final { public: + YUZU_NON_COPYABLE(OGLFramebuffer); + OGLFramebuffer() = default; OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -272,8 +294,10 @@ public: GLuint handle = 0; }; -class OGLQuery : private NonCopyable { +class OGLQuery final { public: + YUZU_NON_COPYABLE(OGLQuery); + OGLQuery() = default; OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 5864c7c07..550ed6d36 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -9,7 +9,6 @@ #include <glad/glad.h> #include "common/common_types.h" -#include "core/core.h" #include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index daba42ed9..db5bf1d30 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -184,6 +184,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32: case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; + case Maxwell::VertexAttribute::Size::Size_11_11_10: + return GL_UNSIGNED_INT_10F_11F_11F_REV; default: break; } diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 2c3914459..ec03cca38 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -9,6 +9,7 @@ #include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h" #include "video_core/host_shaders/convert_depth_to_float_frag_spv.h" #include "video_core/host_shaders/convert_float_to_depth_frag_spv.h" +#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h" #include "video_core/host_shaders/full_screen_triangle_vert_spv.h" #include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h" #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" @@ -370,6 +371,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), + convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) { if (device.IsExtShaderStencilExportSupported()) { @@ -474,6 +476,13 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, + ImageView& src_image_view) { + ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(), + convert_s8d24_to_abgr8_frag); + ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 85e7dca5b..1a3944179 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -56,6 +56,8 @@ public: void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -99,6 +101,7 @@ private: vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; + vk::ShaderModule convert_s8d24_to_abgr8_frag; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -112,6 +115,7 @@ private: vk::Pipeline convert_r16_to_d16_pipeline; vk::Pipeline convert_abgr8_to_d24s8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; + vk::Pipeline convert_s8d24_to_abgr8_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 751e4792b..1c136c410 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -495,6 +495,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; + case Maxwell::VertexAttribute::Size::Size_11_11_10: + return VK_FORMAT_B10G11R11_UFLOAT_PACK32; default: break; } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index c71a1f44d..621a6a071 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -100,6 +100,8 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { return VK_FORMAT_A8B8G8R8_UNORM_PACK32; case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM: return VK_FORMAT_R5G6B5_UNORM_PACK16; + case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM: + return VK_FORMAT_B8G8R8A8_UNORM; default: UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", static_cast<u32>(framebuffer.pixel_format)); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 3e96c0f60..4d73427b4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstring> #include <memory> #include <optional> @@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, }; - const std::array push_constants{base_vertex, index_shift}; + const std::array<u32, 2> push_constants{base_vertex, index_shift}; const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3bfdf41ba..7d9d4f7ba 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -140,12 +140,12 @@ bool VKScheduler::UpdateRescaling(bool is_rescaling) { void VKScheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("yuzu:VulkanWorker"); do { - if (work_queue.empty()) { - wait_cv.notify_all(); - } std::unique_ptr<CommandChunk> work; { std::unique_lock lock{work_mutex}; + if (work_queue.empty()) { + wait_cv.notify_all(); + } work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); }); if (stop_token.stop_requested()) { continue; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 1b06c9296..e69aa136b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -146,6 +146,7 @@ private: using FuncType = TypedCommand<T>; static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); + recorded_counts++; command_offset = Common::AlignUp(command_offset, alignof(FuncType)); if (command_offset > sizeof(data) - sizeof(FuncType)) { return false; @@ -167,7 +168,7 @@ private: } bool Empty() const { - return command_offset == 0; + return recorded_counts == 0; } bool HasSubmit() const { @@ -178,6 +179,7 @@ private: Command* first = nullptr; Command* last = nullptr; + size_t recorded_counts = 0; size_t command_offset = 0; bool submit = false; alignas(std::max_align_t) std::array<u8, 0x8000> data{}; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 40a149832..8240c83e1 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -8,7 +8,6 @@ #include <limits> #include "common/common_types.h" -#include "core/core.h" #include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0ba56ff1e..8101eb42c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -554,10 +554,12 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im }; } -[[nodiscard]] bool IsFormatFlipped(PixelFormat format) { +[[nodiscard]] bool IsFormatFlipped(PixelFormat format, bool emulate_bgr565) { switch (format) { case PixelFormat::A1B5G5R5_UNORM: return true; + case PixelFormat::B5G6R5_UNORM: + return emulate_bgr565; default: return false; } @@ -1068,6 +1070,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); } + if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) { + return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view); + } break; case PixelFormat::R32_FLOAT: if (src_view.format == PixelFormat::D32_FLOAT) { @@ -1488,7 +1493,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI }; if (!info.IsRenderTarget()) { swizzle = info.Swizzle(); - if (IsFormatFlipped(format)) { + if (IsFormatFlipped(format, device->MustEmulateBGR565())) { std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed); } if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) { diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 329bf4def..2f2594585 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor gpu->BindRenderer(std::move(renderer)); return gpu; } catch (const std::runtime_error& exception) { + scope.Cancel(); LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); return nullptr; } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 153702c0b..effde73c9 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -39,6 +39,11 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_UNDEFINED, }; + +constexpr std::array B5G6R5_UNORM_PACK16{ + VK_FORMAT_R5G6B5_UNORM_PACK16, + VK_FORMAT_UNDEFINED, +}; } // namespace Alternatives enum class NvidiaArchitecture { @@ -87,6 +92,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); case VK_FORMAT_D16_UNORM_S8_UINT: return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data(); + case VK_FORMAT_B5G6R5_UNORM_PACK16: + return Alternatives::B5G6R5_UNORM_PACK16.data(); default: return nullptr; } @@ -224,9 +231,14 @@ std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) { return supported_extensions; } +bool IsExtensionSupported(std::span<const std::string> supported_extensions, + std::string_view extension) { + return std::ranges::find(supported_extensions, extension) != supported_extensions.end(); +} + NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, std::span<const std::string> exts) { - if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) { + if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{}; shading_rate_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; @@ -239,7 +251,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, return NvidiaArchitecture::AmpereOrNewer; } } - if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) { + if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) { return NvidiaArchitecture::Turing; } return NvidiaArchitecture::VoltaOrOlder; @@ -604,7 +616,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR break; } } - if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) { + const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; + if (ext_extended_dynamic_state && is_radv) { // Mask driver version variant const u32 version = (properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) { @@ -613,6 +626,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR ext_extended_dynamic_state = false; } } + if (ext_vertex_input_dynamic_state && is_radv) { + // TODO(ameerj): Blacklist only offending driver versions + // TODO(ameerj): Confirm if RDNA1 is affected + const bool is_rdna2 = + IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME); + if (is_rdna2) { + LOG_WARNING(Render_Vulkan, + "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware"); + ext_vertex_input_dynamic_state = false; + } + } sets_per_pool = 64; const bool is_amd = @@ -628,7 +652,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR has_broken_cube_compatibility = true; } } - const bool is_amd_or_radv = is_amd || driver_id == VK_DRIVER_ID_MESA_RADV; + const bool is_amd_or_radv = is_amd || is_radv; if (ext_sampler_filter_minmax && is_amd_or_radv) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. if (!is_float16_supported) { @@ -639,6 +663,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS; + const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; if (ext_vertex_input_dynamic_state && is_intel_windows) { LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state"); ext_vertex_input_dynamic_state = false; @@ -652,6 +677,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); cant_blit_msaa = true; } + if (is_intel_anv) { + LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format"); + must_emulate_bgr565 = true; + } supports_d24_depth = IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 37d140ebd..34b1add16 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -354,6 +354,10 @@ public: return cant_blit_msaa; } + bool MustEmulateBGR565() const { + return must_emulate_bgr565; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -448,6 +452,7 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. // Telemetry parameters std::string vendor_name; ///< Device's driver name. |
