aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp109
-rw-r--r--src/video_core/engines/maxwell_3d.h50
-rw-r--r--src/video_core/engines/maxwell_dma.cpp20
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/fence_manager.h2
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/convert_s8d24_to_abgr8.frag23
-rw-r--r--src/video_core/query_cache.h1
-rw-r--r--src/video_core/renderer_base.h8
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h7
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h50
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h2
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp9
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h4
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp9
-rw-r--r--src/video_core/video_core.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp37
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h5
27 files changed, 281 insertions, 127 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5d6d217bb..54a902f56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -7,6 +7,7 @@
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
@@ -195,7 +196,7 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- return StartCBData(method);
+ return ProcessCBData(argument);
case MAXWELL3D_REG_INDEX(cb_bind[0]):
return ProcessCBBind(0);
case MAXWELL3D_REG_INDEX(cb_bind[1]):
@@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return ProcessCBBind(4);
case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
return DrawArrays();
+ case MAXWELL3D_REG_INDEX(small_index):
+ regs.index_array.count = regs.small_index.count;
+ regs.index_array.first = regs.small_index.first;
+ dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(topology_override):
+ use_topology_override = true;
+ return;
case MAXWELL3D_REG_INDEX(clear_buffers):
return ProcessClearBuffers();
case MAXWELL3D_REG_INDEX(query.query_get):
@@ -248,14 +257,6 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
}
void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
- if (method == cb_data_state.current) {
- regs.reg_array[method] = method_argument;
- ProcessCBData(method_argument);
- return;
- } else if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
-
// It is an error to write to a register other than the current macro's ARG register before it
// has finished execution.
if (executing_macro != 0) {
@@ -302,7 +303,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 13:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 14:
case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
- ProcessCBMultiData(method, base_start, amount);
+ ProcessCBMultiData(base_start, amount);
break;
default:
for (std::size_t i = 0; i < amount; i++) {
@@ -360,6 +361,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
}
+void Maxwell3D::ProcessTopologyOverride() {
+ using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology;
+ using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride;
+
+ PrimitiveTopology topology{};
+
+ switch (regs.topology_override) {
+ case PrimitiveTopologyOverride::None:
+ topology = regs.draw.topology;
+ break;
+ case PrimitiveTopologyOverride::Points:
+ topology = PrimitiveTopology::Points;
+ break;
+ case PrimitiveTopologyOverride::Lines:
+ topology = PrimitiveTopology::Lines;
+ break;
+ case PrimitiveTopologyOverride::LineStrip:
+ topology = PrimitiveTopology::LineStrip;
+ break;
+ default:
+ topology = static_cast<PrimitiveTopology>(regs.topology_override);
+ break;
+ }
+
+ if (use_topology_override) {
+ regs.draw.topology.Assign(topology);
+ }
+}
+
void Maxwell3D::FlushMMEInlineDraw() {
LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
@@ -370,6 +400,8 @@ void Maxwell3D::FlushMMEInlineDraw() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
rasterizer->Draw(is_indexed, true);
@@ -529,6 +561,8 @@ void Maxwell3D::DrawArrays() {
ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
"Illegal combination of instancing parameters");
+ ProcessTopologyOverride();
+
if (regs.draw.instance_next) {
// Increment the current instance *before* drawing.
state.current_instance += 1;
@@ -587,46 +621,7 @@ void Maxwell3D::ProcessCBBind(size_t stage_index) {
rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size);
}
-void Maxwell3D::ProcessCBData(u32 value) {
- const u32 id = cb_data_state.id;
- cb_data_state.buffer[id][cb_data_state.counter] = value;
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
- cb_data_state.counter++;
-}
-
-void Maxwell3D::StartCBData(u32 method) {
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
-}
-
-void Maxwell3D::ProcessCBMultiData(u32 method, const u32* start_base, u32 amount) {
- if (cb_data_state.current != method) {
- if (cb_data_state.current != null_cb_data) {
- FinishCBData();
- }
- constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data);
- cb_data_state.start_pos = regs.const_buffer.cb_pos;
- cb_data_state.id = method - first_cb_data;
- cb_data_state.current = method;
- cb_data_state.counter = 0;
- }
- const std::size_t id = cb_data_state.id;
- const std::size_t size = amount;
- std::size_t i = 0;
- for (; i < size; i++) {
- cb_data_state.buffer[id][cb_data_state.counter] = start_base[i];
- cb_data_state.counter++;
- }
- // Increment the current buffer position.
- regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4 * amount;
-}
-
-void Maxwell3D::FinishCBData() {
+void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
// Write the input value to the current const buffer at the current position.
const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
ASSERT(buffer_address != 0);
@@ -634,14 +629,16 @@ void Maxwell3D::FinishCBData() {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
- const GPUVAddr address{buffer_address + cb_data_state.start_pos};
- const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
+ const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+ const size_t copy_size = amount * sizeof(u32);
+ memory_manager.WriteBlock(address, start_base, copy_size);
- const u32 id = cb_data_state.id;
- memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
+ // Increment the current buffer position.
+ regs.const_buffer.cb_pos += static_cast<u32>(copy_size);
+}
- cb_data_state.id = null_cb_data;
- cb_data_state.current = null_cb_data;
+void Maxwell3D::ProcessCBData(u32 value) {
+ ProcessCBMultiData(&value, 1);
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dc9df6c8b..357a74c70 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -367,6 +367,22 @@ public:
Patches = 0xe,
};
+ // Constants as from NVC0_3D_UNK1970_D3D
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598
+ enum class PrimitiveTopologyOverride : u32 {
+ None = 0x0,
+ Points = 0x1,
+ Lines = 0x2,
+ LineStrip = 0x3,
+ Triangles = 0x4,
+ TriangleStrip = 0x5,
+ LinesAdjacency = 0xa,
+ LineStripAdjacency = 0xb,
+ TrianglesAdjacency = 0xc,
+ TriangleStripAdjacency = 0xd,
+ Patches = 0xe,
+ };
+
enum class IndexFormat : u32 {
UnsignedByte = 0x0,
UnsignedShort = 0x1,
@@ -1200,7 +1216,12 @@ public:
}
} index_array;
- INSERT_PADDING_WORDS_NOINIT(0x7);
+ union {
+ BitField<0, 16, u32> first;
+ BitField<16, 16, u32> count;
+ } small_index;
+
+ INSERT_PADDING_WORDS_NOINIT(0x6);
INSERT_PADDING_WORDS_NOINIT(0x1F);
@@ -1244,7 +1265,11 @@ public:
BitField<11, 1, u32> depth_clamp_disabled;
} view_volume_clip_control;
- INSERT_PADDING_WORDS_NOINIT(0x1F);
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ PrimitiveTopologyOverride topology_override;
+
+ INSERT_PADDING_WORDS_NOINIT(0x12);
u32 depth_bounds_enable;
@@ -1520,10 +1545,8 @@ private:
void ProcessSyncPoint();
/// Handles a write to the CB_DATA[i] register.
- void StartCBData(u32 method);
void ProcessCBData(u32 value);
- void ProcessCBMultiData(u32 method, const u32* start_base, u32 amount);
- void FinishCBData();
+ void ProcessCBMultiData(const u32* start_base, u32 amount);
/// Handles a write to the CB_BIND register.
void ProcessCBBind(size_t stage_index);
@@ -1531,6 +1554,9 @@ private:
/// Handles a write to the VERTEX_END_GL register, triggering a draw.
void DrawArrays();
+ /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro)
+ void ProcessTopologyOverride();
+
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
@@ -1555,20 +1581,10 @@ private:
/// Interpreter for the macro codes uploaded to the GPU.
std::unique_ptr<MacroEngine> macro_engine;
- static constexpr u32 null_cb_data = 0xFFFFFFFF;
- struct CBDataState {
- static constexpr size_t inline_size = 0x4000;
- std::array<std::array<u32, inline_size>, 16> buffer;
- u32 current{null_cb_data};
- u32 id{null_cb_data};
- u32 start_pos{};
- u32 counter{};
- };
- CBDataState cb_data_state;
-
Upload::State upload_state;
bool execute_on{true};
+ bool use_topology_override{false};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1685,6 +1701,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(small_index, 0x5F9);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(vp_point_size, 0x644);
@@ -1694,6 +1711,7 @@ ASSERT_REG_POSITION(cull_face, 0x648);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
ASSERT_REG_POSITION(view_volume_clip_control, 0x64F);
+ASSERT_REG_POSITION(topology_override, 0x65C);
ASSERT_REG_POSITION(depth_bounds_enable, 0x66F);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..1fc1358bc 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -53,7 +53,6 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -79,6 +78,7 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
+void MaxwellDMA::ReleaseSemaphore() {
+ const auto type = regs.launch_dma.semaphore_type;
+ const GPUVAddr address = regs.semaphore.address;
+ switch (type) {
+ case LaunchDMA::SemaphoreType::NONE:
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
+ memory_manager.Write<u32>(address, regs.semaphore.payload);
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
+ memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
+ memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
+ break;
+ default:
+ UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+ }
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..2692cac8a 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -224,6 +224,8 @@ private:
void FastCopyBlockLinearToPitch();
+ void ReleaseSemaphore();
+
Core::System& system;
MemoryManager& memory_manager;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 34dc6c596..f80d62c80 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -8,8 +8,6 @@
#include <queue>
#include "common/common_types.h"
-#include "common/settings.h"
-#include "core/core.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index fd3e41434..af05d47d1 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SHADER_FILES
convert_d24s8_to_abgr8.frag
convert_depth_to_float.frag
convert_float_to_depth.frag
+ convert_s8d24_to_abgr8.frag
full_screen_triangle.vert
fxaa.frag
fxaa.vert
diff --git a/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
new file mode 100644
index 000000000..c8a1683b8
--- /dev/null
+++ b/src/video_core/host_shaders/convert_s8d24_to_abgr8.frag
@@ -0,0 +1,23 @@
+// Copyright 2022 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 450
+
+layout(binding = 0) uniform sampler2D depth_tex;
+layout(binding = 1) uniform isampler2D stencil_tex;
+
+layout(location = 0) out vec4 color;
+
+void main() {
+ ivec2 coord = ivec2(gl_FragCoord.xy);
+ uint depth = uint(textureLod(depth_tex, coord, 0).r * (exp2(24.0) - 1.0f));
+ uint stencil = uint(textureLod(stencil_tex, coord, 0).r);
+
+ highp uint depth_val =
+ uint(textureLod(depth_tex, coord, 0).r * (exp2(32.0) - 1.0));
+ lowp uint stencil_val = textureLod(stencil_tex, coord, 0).r;
+ highp uvec4 components =
+ uvec4((uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu, stencil_val);
+ color.rgba = vec4(components) / (exp2(8.0) - 1.0);
+}
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 392f82eb7..0173b54d8 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -18,7 +18,6 @@
#include "common/assert.h"
#include "common/settings.h"
-#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index bb204454e..c5f974080 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -5,9 +5,10 @@
#pragma once
#include <atomic>
+#include <functional>
#include <memory>
-#include <optional>
+#include "common/common_funcs.h"
#include "common/common_types.h"
#include "core/frontend/emu_window.h"
#include "video_core/gpu.h"
@@ -28,8 +29,11 @@ struct RendererSettings {
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
-class RendererBase : NonCopyable {
+class RendererBase {
public:
+ YUZU_NON_COPYABLE(RendererBase);
+ YUZU_NON_MOVEABLE(RendererBase);
+
explicit RendererBase(Core::Frontend::EmuWindow& window,
std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 151290101..293ad7d59 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -31,9 +31,8 @@ bool GLInnerFence::IsSignaled() const {
return true;
}
ASSERT(sync_object.handle != 0);
- GLsizei length;
GLint sync_status;
- glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
+ glGetSynciv(sync_object.handle, GL_SYNC_STATUS, 1, nullptr, &sync_status);
return sync_status == GL_SIGNALED;
}
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index f8495896c..9e6732abd 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline(
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
- if (in_parallel) {
- // Make sure program is built before continuing when building in parallel
- glGetString(GL_PROGRAM_ERROR_STRING_NV);
- }
}
break;
case Settings::ShaderBackend::SPIRV:
@@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline(
break;
}
}
- if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
- // Make sure programs have built if we are building shaders in parallel
- for (OGLProgram& program : source_programs) {
- if (program.handle != 0) {
- GLint status{};
- glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
- }
- }
+ if (in_parallel) {
+ std::lock_guard lock{built_mutex};
+ built_fence.Create();
+ // Flush this context to ensure compilation commands and fence are in the GPU pipe.
+ glFlush();
+ built_condvar.notify_one();
+ } else {
+ is_built = true;
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
- is_built = true;
- built_condvar.notify_one();
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
@@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
- if (!is_built.load(std::memory_order::relaxed)) {
+ if (!IsBuilt()) {
WaitForBuild();
}
const bool use_assembly{assembly_programs[0].handle != 0};
@@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() {
}
void GraphicsPipeline::WaitForBuild() {
- std::unique_lock lock{built_mutex};
- built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+ if (built_fence.handle == 0) {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return built_fence.handle != 0; });
+ }
+ ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED);
+ is_built = true;
+}
+
+bool GraphicsPipeline::IsBuilt() noexcept {
+ if (is_built) {
+ return true;
+ }
+ if (built_fence.handle == 0) {
+ return false;
+ }
+ // Timeout of zero means this is non-blocking
+ const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0);
+ ASSERT(sync_status != GL_WAIT_FAILED);
+ is_built = sync_status != GL_TIMEOUT_EXPIRED;
+ return is_built;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 4e28d9a42..311d49f3f 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -100,9 +100,7 @@ public:
return writes_global_memory;
}
- [[nodiscard]] bool IsBuilt() const noexcept {
- return is_built.load(std::memory_order::relaxed);
- }
+ [[nodiscard]] bool IsBuilt() noexcept;
template <typename Spec>
static auto MakeConfigureSpecFunc() {
@@ -154,7 +152,8 @@ private:
std::mutex built_mutex;
std::condition_variable built_condvar;
- std::atomic_bool is_built{false};
+ OGLSync built_fence{};
+ bool is_built{false};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index b2d5bfd3b..84e07f8bd 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -7,12 +7,14 @@
#include <string_view>
#include <utility>
#include <glad/glad.h>
-#include "common/common_types.h"
+#include "common/common_funcs.h"
namespace OpenGL {
-class OGLRenderbuffer : private NonCopyable {
+class OGLRenderbuffer final {
public:
+ YUZU_NON_COPYABLE(OGLRenderbuffer);
+
OGLRenderbuffer() = default;
OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -36,8 +38,10 @@ public:
GLuint handle = 0;
};
-class OGLTexture : private NonCopyable {
+class OGLTexture final {
public:
+ YUZU_NON_COPYABLE(OGLTexture);
+
OGLTexture() = default;
OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -61,8 +65,10 @@ public:
GLuint handle = 0;
};
-class OGLTextureView : private NonCopyable {
+class OGLTextureView final {
public:
+ YUZU_NON_COPYABLE(OGLTextureView);
+
OGLTextureView() = default;
OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -86,8 +92,10 @@ public:
GLuint handle = 0;
};
-class OGLSampler : private NonCopyable {
+class OGLSampler final {
public:
+ YUZU_NON_COPYABLE(OGLSampler);
+
OGLSampler() = default;
OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -111,8 +119,10 @@ public:
GLuint handle = 0;
};
-class OGLShader : private NonCopyable {
+class OGLShader final {
public:
+ YUZU_NON_COPYABLE(OGLShader);
+
OGLShader() = default;
OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -132,8 +142,10 @@ public:
GLuint handle = 0;
};
-class OGLProgram : private NonCopyable {
+class OGLProgram final {
public:
+ YUZU_NON_COPYABLE(OGLProgram);
+
OGLProgram() = default;
OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -154,8 +166,10 @@ public:
GLuint handle = 0;
};
-class OGLAssemblyProgram : private NonCopyable {
+class OGLAssemblyProgram final {
public:
+ YUZU_NON_COPYABLE(OGLAssemblyProgram);
+
OGLAssemblyProgram() = default;
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -176,8 +190,10 @@ public:
GLuint handle = 0;
};
-class OGLPipeline : private NonCopyable {
+class OGLPipeline final {
public:
+ YUZU_NON_COPYABLE(OGLPipeline);
+
OGLPipeline() = default;
OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {}
@@ -198,8 +214,10 @@ public:
GLuint handle = 0;
};
-class OGLBuffer : private NonCopyable {
+class OGLBuffer final {
public:
+ YUZU_NON_COPYABLE(OGLBuffer);
+
OGLBuffer() = default;
OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -223,8 +241,10 @@ public:
GLuint handle = 0;
};
-class OGLSync : private NonCopyable {
+class OGLSync final {
public:
+ YUZU_NON_COPYABLE(OGLSync);
+
OGLSync() = default;
OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {}
@@ -247,8 +267,10 @@ public:
GLsync handle = 0;
};
-class OGLFramebuffer : private NonCopyable {
+class OGLFramebuffer final {
public:
+ YUZU_NON_COPYABLE(OGLFramebuffer);
+
OGLFramebuffer() = default;
OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
@@ -272,8 +294,10 @@ public:
GLuint handle = 0;
};
-class OGLQuery : private NonCopyable {
+class OGLQuery final {
public:
+ YUZU_NON_COPYABLE(OGLQuery);
+
OGLQuery() = default;
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 5864c7c07..550ed6d36 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -9,7 +9,6 @@
#include <glad/glad.h>
#include "common/common_types.h"
-#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index daba42ed9..db5bf1d30 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -184,6 +184,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
+ case Maxwell::VertexAttribute::Size::Size_11_11_10:
+ return GL_UNSIGNED_INT_10F_11F_11F_REV;
default:
break;
}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 2c3914459..ec03cca38 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -9,6 +9,7 @@
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
+#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
@@ -370,6 +371,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)),
+ convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)),
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
if (device.IsExtShaderStencilExportSupported()) {
@@ -474,6 +476,13 @@ void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
+void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
+ ImageView& src_image_view) {
+ ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
+ convert_s8d24_to_abgr8_frag);
+ ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
+}
+
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 85e7dca5b..1a3944179 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -56,6 +56,8 @@ public:
void ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+ void ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view);
+
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -99,6 +101,7 @@ private:
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
vk::ShaderModule convert_d24s8_to_abgr8_frag;
+ vk::ShaderModule convert_s8d24_to_abgr8_frag;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
@@ -112,6 +115,7 @@ private:
vk::Pipeline convert_r16_to_d16_pipeline;
vk::Pipeline convert_abgr8_to_d24s8_pipeline;
vk::Pipeline convert_d24s8_to_abgr8_pipeline;
+ vk::Pipeline convert_s8d24_to_abgr8_pipeline;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 751e4792b..1c136c410 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -495,6 +495,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
+ case Maxwell::VertexAttribute::Size::Size_11_11_10:
+ return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
default:
break;
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index c71a1f44d..621a6a071 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -100,6 +100,8 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
case Tegra::FramebufferConfig::PixelFormat::RGB565_UNORM:
return VK_FORMAT_R5G6B5_UNORM_PACK16;
+ case Tegra::FramebufferConfig::PixelFormat::B8G8R8A8_UNORM:
+ return VK_FORMAT_B8G8R8A8_UNORM;
default:
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
static_cast<u32>(framebuffer.pixel_format));
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 3e96c0f60..4d73427b4 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cstring>
#include <memory>
#include <optional>
@@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
};
- const std::array push_constants{base_vertex, index_shift};
+ const std::array<u32, 2> push_constants{base_vertex, index_shift};
const VkDescriptorSet set = descriptor_allocator.Commit();
device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 3bfdf41ba..7d9d4f7ba 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -140,12 +140,12 @@ bool VKScheduler::UpdateRescaling(bool is_rescaling) {
void VKScheduler::WorkerThread(std::stop_token stop_token) {
Common::SetCurrentThreadName("yuzu:VulkanWorker");
do {
- if (work_queue.empty()) {
- wait_cv.notify_all();
- }
std::unique_ptr<CommandChunk> work;
{
std::unique_lock lock{work_mutex};
+ if (work_queue.empty()) {
+ wait_cv.notify_all();
+ }
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
if (stop_token.stop_requested()) {
continue;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 1b06c9296..e69aa136b 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -146,6 +146,7 @@ private:
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+ recorded_counts++;
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
@@ -167,7 +168,7 @@ private:
}
bool Empty() const {
- return command_offset == 0;
+ return recorded_counts == 0;
}
bool HasSubmit() const {
@@ -178,6 +179,7 @@ private:
Command* first = nullptr;
Command* last = nullptr;
+ size_t recorded_counts = 0;
size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x8000> data{};
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 40a149832..8240c83e1 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -8,7 +8,6 @@
#include <limits>
#include "common/common_types.h"
-#include "core/core.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 0ba56ff1e..8101eb42c 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -554,10 +554,12 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
};
}
-[[nodiscard]] bool IsFormatFlipped(PixelFormat format) {
+[[nodiscard]] bool IsFormatFlipped(PixelFormat format, bool emulate_bgr565) {
switch (format) {
case PixelFormat::A1B5G5R5_UNORM:
return true;
+ case PixelFormat::B5G6R5_UNORM:
+ return emulate_bgr565;
default:
return false;
}
@@ -1068,6 +1070,9 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) {
return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view);
}
+ if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) {
+ return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view);
+ }
break;
case PixelFormat::R32_FLOAT:
if (src_view.format == PixelFormat::D32_FLOAT) {
@@ -1488,7 +1493,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
};
if (!info.IsRenderTarget()) {
swizzle = info.Swizzle();
- if (IsFormatFlipped(format)) {
+ if (IsFormatFlipped(format, device->MustEmulateBGR565())) {
std::ranges::transform(swizzle, swizzle.begin(), SwapBlueRed);
}
if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 329bf4def..2f2594585 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor
gpu->BindRenderer(std::move(renderer));
return gpu;
} catch (const std::runtime_error& exception) {
+ scope.Cancel();
LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what());
return nullptr;
}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 153702c0b..effde73c9 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -39,6 +39,11 @@ constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_FORMAT_UNDEFINED,
};
+
+constexpr std::array B5G6R5_UNORM_PACK16{
+ VK_FORMAT_R5G6B5_UNORM_PACK16,
+ VK_FORMAT_UNDEFINED,
+};
} // namespace Alternatives
enum class NvidiaArchitecture {
@@ -87,6 +92,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data();
case VK_FORMAT_D16_UNORM_S8_UINT:
return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data();
+ case VK_FORMAT_B5G6R5_UNORM_PACK16:
+ return Alternatives::B5G6R5_UNORM_PACK16.data();
default:
return nullptr;
}
@@ -224,9 +231,14 @@ std::vector<std::string> GetSupportedExtensions(vk::PhysicalDevice physical) {
return supported_extensions;
}
+bool IsExtensionSupported(std::span<const std::string> supported_extensions,
+ std::string_view extension) {
+ return std::ranges::find(supported_extensions, extension) != supported_extensions.end();
+}
+
NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
std::span<const std::string> exts) {
- if (std::ranges::find(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME) != exts.end()) {
+ if (IsExtensionSupported(exts, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
VkPhysicalDeviceFragmentShadingRatePropertiesKHR shading_rate_props{};
shading_rate_props.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
@@ -239,7 +251,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
return NvidiaArchitecture::AmpereOrNewer;
}
}
- if (std::ranges::find(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME) != exts.end()) {
+ if (IsExtensionSupported(exts, VK_NV_SHADING_RATE_IMAGE_EXTENSION_NAME)) {
return NvidiaArchitecture::Turing;
}
return NvidiaArchitecture::VoltaOrOlder;
@@ -604,7 +616,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
break;
}
}
- if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
+ const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV;
+ if (ext_extended_dynamic_state && is_radv) {
// Mask driver version variant
const u32 version = (properties.driverVersion << 3) >> 3;
if (version < VK_MAKE_API_VERSION(0, 21, 2, 0)) {
@@ -613,6 +626,17 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
ext_extended_dynamic_state = false;
}
}
+ if (ext_vertex_input_dynamic_state && is_radv) {
+ // TODO(ameerj): Blacklist only offending driver versions
+ // TODO(ameerj): Confirm if RDNA1 is affected
+ const bool is_rdna2 =
+ IsExtensionSupported(supported_extensions, VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME);
+ if (is_rdna2) {
+ LOG_WARNING(Render_Vulkan,
+ "RADV has broken VK_EXT_vertex_input_dynamic_state on RDNA2 hardware");
+ ext_vertex_input_dynamic_state = false;
+ }
+ }
sets_per_pool = 64;
const bool is_amd =
@@ -628,7 +652,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
has_broken_cube_compatibility = true;
}
}
- const bool is_amd_or_radv = is_amd || driver_id == VK_DRIVER_ID_MESA_RADV;
+ const bool is_amd_or_radv = is_amd || is_radv;
if (ext_sampler_filter_minmax && is_amd_or_radv) {
// Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken.
if (!is_float16_supported) {
@@ -639,6 +663,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
const bool is_intel_windows = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS;
+ const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
if (ext_vertex_input_dynamic_state && is_intel_windows) {
LOG_WARNING(Render_Vulkan, "Blacklisting Intel for VK_EXT_vertex_input_dynamic_state");
ext_vertex_input_dynamic_state = false;
@@ -652,6 +677,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
cant_blit_msaa = true;
}
+ if (is_intel_anv) {
+ LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format");
+ must_emulate_bgr565 = true;
+ }
supports_d24_depth =
IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT,
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 37d140ebd..34b1add16 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -354,6 +354,10 @@ public:
return cant_blit_msaa;
}
+ bool MustEmulateBGR565() const {
+ return must_emulate_bgr565;
+ }
+
private:
/// Checks if the physical device is suitable.
void CheckSuitability(bool requires_swapchain) const;
@@ -448,6 +452,7 @@ private:
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
bool supports_d24_depth{}; ///< Supports D24 depth buffers.
bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
+ bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.