diff options
Diffstat (limited to 'src/video_core')
38 files changed, 743 insertions, 281 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e8bd0060..bf6439530 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -281,7 +281,7 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PUBLIC glad shader_recompiler stb) -if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) +if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) add_dependencies(video_core ffmpeg-build) endif() @@ -345,3 +345,7 @@ endif() if (YUZU_ENABLE_LTO) set_property(TARGET video_core PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif() + +if (ANDROID AND ARCHITECTURE_arm64) + target_link_libraries(video_core PRIVATE adrenotools) +endif() diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 40db243d2..4b4f7061b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -2,6 +2,8 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "common/microprofile.h" +#include "video_core/buffer_cache/buffer_cache_base.h" +#include "video_core/control/channel_state_cache.inc" namespace VideoCommon { @@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12 MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); +template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>; + } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 08bc66aaa..2f281b370 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -63,18 +63,27 @@ void BufferCache<P>::RunGarbageCollector() { template <class P> void BufferCache<P>::TickFrame() { + // Homebrew console apps don't create or bind any channels, so this will be nullptr. + if (!channel_state) { + return; + } + // Calculate hits and shots and move hit bits to the right - const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); - const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); - std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, - uniform_cache_hits.begin() + 1); - std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, - uniform_cache_shots.begin() + 1); - uniform_cache_hits[0] = 0; - uniform_cache_shots[0] = 0; + const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), + channel_state->uniform_cache_hits.end()); + const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(), + channel_state->uniform_cache_shots.end()); + std::copy_n(channel_state->uniform_cache_hits.begin(), + channel_state->uniform_cache_hits.size() - 1, + channel_state->uniform_cache_hits.begin() + 1); + std::copy_n(channel_state->uniform_cache_shots.begin(), + channel_state->uniform_cache_shots.size() - 1, + channel_state->uniform_cache_shots.begin() + 1); + channel_state->uniform_cache_hits[0] = 0; + channel_state->uniform_cache_shots[0] = 0; const bool skip_preferred = hits * 256 < shots * 251; - uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; // If we can obtain the memory info, use it instead of the estimate. if (runtime.CanReportMemoryUsage()) { @@ -164,10 +173,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am BufferId buffer_a; BufferId buffer_b; do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); auto& src_buffer = slot_buffers[buffer_a]; auto& dest_buffer = slot_buffers[buffer_b]; SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); @@ -272,30 +281,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr .size = size, .buffer_id = BufferId{}, }; - uniform_buffers[stage][index] = binding; + channel_state->uniform_buffers[stage][index] = binding; } template <class P> void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) { - uniform_buffers[stage][index] = NULL_BINDING; + channel_state->uniform_buffers[stage][index] = NULL_BINDING; } template <class P> void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { MICROPROFILE_SCOPE(GPU_PrepareBuffers); do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; DoUpdateGraphicsBuffers(is_indexed); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> void BufferCache<P>::UpdateComputeBuffers() { MICROPROFILE_SCOPE(GPU_PrepareBuffers); do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; DoUpdateComputeBuffers(); - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> @@ -338,98 +347,102 @@ template <class P> void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffer_masks != mask) { + if (channel_state->enabled_uniform_buffer_masks != mask) { if constexpr (IS_OPENGL) { - fast_bound_uniform_buffers.fill(0); + channel_state->fast_bound_uniform_buffers.fill(0); } - dirty_uniform_buffers.fill(~u32{0}); - uniform_buffer_binding_sizes.fill({}); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->uniform_buffer_binding_sizes.fill({}); } } - enabled_uniform_buffer_masks = mask; - uniform_buffer_sizes = sizes; + channel_state->enabled_uniform_buffer_masks = mask; + channel_state->uniform_buffer_sizes = sizes; } template <class P> void BufferCache<P>::SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes) { - enabled_compute_uniform_buffer_mask = mask; - compute_uniform_buffer_sizes = sizes; + channel_state->enabled_compute_uniform_buffer_mask = mask; + channel_state->compute_uniform_buffer_sizes = sizes; } template <class P> void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { - enabled_storage_buffers[stage] = 0; - written_storage_buffers[stage] = 0; + channel_state->enabled_storage_buffers[stage] = 0; + channel_state->written_storage_buffers[stage] = 0; } template <class P> void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written) { - enabled_storage_buffers[stage] |= 1U << ssbo_index; - written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; + channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index; + channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; const auto& cbufs = maxwell3d->state.shader_stages[stage]; const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; - storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); + channel_state->storage_buffers[stage][ssbo_index] = + StorageBufferBinding(ssbo_addr, cbuf_index, is_written); } template <class P> void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { - enabled_texture_buffers[stage] = 0; - written_texture_buffers[stage] = 0; - image_texture_buffers[stage] = 0; + channel_state->enabled_texture_buffers[stage] = 0; + channel_state->written_texture_buffers[stage] = 0; + channel_state->image_texture_buffers[stage] = 0; } template <class P> void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image) { - enabled_texture_buffers[stage] |= 1U << tbo_index; - written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index; + channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; } - texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); + channel_state->texture_buffers[stage][tbo_index] = + GetTextureBufferBinding(gpu_addr, size, format); } template <class P> void BufferCache<P>::UnbindComputeStorageBuffers() { - enabled_compute_storage_buffers = 0; - written_compute_storage_buffers = 0; - image_compute_texture_buffers = 0; + channel_state->enabled_compute_storage_buffers = 0; + channel_state->written_compute_storage_buffers = 0; + channel_state->image_compute_texture_buffers = 0; } template <class P> void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written) { - enabled_compute_storage_buffers |= 1U << ssbo_index; - written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; + channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index; + channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; const auto& launch_desc = kepler_compute->launch_description; ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); const auto& cbufs = launch_desc.const_buffer_config; const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; - compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); + channel_state->compute_storage_buffers[ssbo_index] = + StorageBufferBinding(ssbo_addr, cbuf_index, is_written); } template <class P> void BufferCache<P>::UnbindComputeTextureBuffers() { - enabled_compute_texture_buffers = 0; - written_compute_texture_buffers = 0; - image_compute_texture_buffers = 0; + channel_state->enabled_compute_texture_buffers = 0; + channel_state->written_compute_texture_buffers = 0; + channel_state->image_compute_texture_buffers = 0; } template <class P> void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, bool is_written, bool is_image) { - enabled_compute_texture_buffers |= 1U << tbo_index; - written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + channel_state->enabled_compute_texture_buffers |= 1U << tbo_index; + channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; } - compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); + channel_state->compute_texture_buffers[tbo_index] = + GetTextureBufferBinding(gpu_addr, size, format); } template <class P> @@ -670,10 +683,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { template <class P> void BufferCache<P>::BindHostIndexBuffer() { - Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer, index_buffer.buffer_id); - const u32 offset = buffer.Offset(index_buffer.cpu_addr); - const u32 size = index_buffer.size; + Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; + TouchBuffer(buffer, channel_state->index_buffer.buffer_id); + const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); + const u32 size = channel_state->index_buffer.size; const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { @@ -687,7 +700,7 @@ void BufferCache<P>::BindHostIndexBuffer() { buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); } } else { - SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); + SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); } if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { const u32 new_offset = @@ -704,7 +717,7 @@ template <class P> void BufferCache<P>::BindHostVertexBuffers() { auto& flags = maxwell3d->dirty.flags; for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = vertex_buffers[index]; + const Binding& binding = channel_state->vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); @@ -727,19 +740,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); }; if (current_draw_indirect->include_count) { - bind_buffer(count_buffer_binding); + bind_buffer(channel_state->count_buffer_binding); } - bind_buffer(indirect_buffer_binding); + bind_buffer(channel_state->indirect_buffer_binding); } template <class P> void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { u32 dirty = ~0U; if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty = std::exchange(dirty_uniform_buffers[stage], 0); + dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { + ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -751,13 +764,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind) { - const Binding& binding = uniform_buffers[stage][index]; + const Binding& binding = channel_state->uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); + const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && - size <= uniform_buffer_skip_cache_size && + size <= channel_state->uniform_buffer_skip_cache_size && !memory_tracker.IsRegionGpuModified(cpu_addr, size); if (use_fast_buffer) { if constexpr (IS_OPENGL) { @@ -765,11 +778,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Fast path for Nvidia const bool should_fast_bind = !HasFastUniformBufferBound(stage, binding_index) || - uniform_buffer_binding_sizes[stage][binding_index] != size; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -778,8 +791,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } } if constexpr (IS_OPENGL) { - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); @@ -789,15 +802,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 // Classic cached path const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); if (sync_cached) { - ++uniform_cache_hits[0]; + ++channel_state->uniform_cache_hits[0]; } - ++uniform_cache_shots[0]; + ++channel_state->uniform_cache_shots[0]; // Skip binding if it's not needed and if the bound buffer is not the fast version // This exists to avoid instances where the fast buffer is bound and a GPU write happens needs_bind |= HasFastUniformBufferBound(stage, binding_index); if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; } if (!needs_bind) { return; @@ -805,14 +818,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const u32 offset = buffer.Offset(cpu_addr); if constexpr (IS_OPENGL) { // Fast buffer will be unbound - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); // Mark the index as dirty if offset doesn't match const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); - dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; } if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - uniform_buffer_binding_sizes[stage][binding_index] = size; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); @@ -824,15 +837,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 template <class P> void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { u32 binding_index = 0; - ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { - const Binding& binding = storage_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { + const Binding& binding = channel_state->storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; + const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); ++binding_index; @@ -844,8 +857,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { template <class P> void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { - ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { - const TextureBufferBinding& binding = texture_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -853,7 +866,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - if (((image_texture_buffers[stage] >> index) & 1) != 0) { + if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); } else { runtime.BindTextureBuffer(buffer, offset, size, format); @@ -870,7 +883,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { return; } for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { - const Binding& binding = transform_feedback_buffers[index]; + const Binding& binding = channel_state->transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; @@ -885,15 +898,16 @@ template <class P> void BufferCache<P>::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty - dirty_uniform_buffers.fill(~u32{0}); - fast_bound_uniform_buffers.fill(0); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { - const Binding& binding = compute_uniform_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { + const Binding& binding = channel_state->compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); - const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); + const u32 size = + std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -909,15 +923,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { template <class P> void BufferCache<P>::BindHostComputeStorageBuffers() { u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { - const Binding& binding = compute_storage_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { + const Binding& binding = channel_state->compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); - const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; + const bool is_written = + ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; if constexpr (NEEDS_BIND_STORAGE_INDEX) { runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); ++binding_index; @@ -929,8 +944,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { template <class P> void BufferCache<P>::BindHostComputeTextureBuffers() { - ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { - const TextureBufferBinding& binding = compute_texture_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -938,7 +953,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { const u32 offset = buffer.Offset(binding.cpu_addr); const PixelFormat format = binding.format; if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { - if (((image_compute_texture_buffers >> index) & 1) != 0) { + if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { runtime.BindImageBuffer(buffer, offset, size, format); } else { runtime.BindTextureBuffer(buffer, offset, size, format); @@ -952,7 +967,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { template <class P> void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { do { - has_deleted_buffers = false; + channel_state->has_deleted_buffers = false; if (is_indexed) { UpdateIndexBuffer(); } @@ -966,7 +981,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { if (current_draw_indirect) { UpdateDrawIndirect(); } - } while (has_deleted_buffers); + } while (channel_state->has_deleted_buffers); } template <class P> @@ -997,7 +1012,7 @@ void BufferCache<P>::UpdateIndexBuffer() { slot_buffers.erase(inline_buffer_id); inline_buffer_id = CreateBuffer(0, buffer_size); } - index_buffer = Binding{ + channel_state->index_buffer = Binding{ .cpu_addr = 0, .size = inline_index_size, .buffer_id = inline_buffer_id, @@ -1013,10 +1028,10 @@ void BufferCache<P>::UpdateIndexBuffer() { (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); const u32 size = std::min(address_size, draw_size); if (size == 0 || !cpu_addr) { - index_buffer = NULL_BINDING; + channel_state->index_buffer = NULL_BINDING; return; } - index_buffer = Binding{ + channel_state->index_buffer = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = FindBuffer(*cpu_addr, size), @@ -1049,13 +1064,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); u32 size = address_size; // TODO: Analyze stride and number of vertices if (array.enable == 0 || size == 0 || !cpu_addr) { - vertex_buffers[index] = NULL_BINDING; + channel_state->vertex_buffers[index] = NULL_BINDING; return; } if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); } - vertex_buffers[index] = Binding{ + channel_state->vertex_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = FindBuffer(*cpu_addr, size), @@ -1077,23 +1092,24 @@ void BufferCache<P>::UpdateDrawIndirect() { }; }; if (current_draw_indirect->include_count) { - update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); + update(current_draw_indirect->count_start_address, sizeof(u32), + channel_state->count_buffer_binding); } update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, - indirect_buffer_binding); + channel_state->indirect_buffer_binding); } template <class P> void BufferCache<P>::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { - Binding& binding = uniform_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { + Binding& binding = channel_state->uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated return; } // Mark as dirty if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty_uniform_buffers[stage] |= 1U << index; + channel_state->dirty_uniform_buffers[stage] |= 1U << index; } // Resolve buffer binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); @@ -1102,10 +1118,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateStorageBuffers(size_t stage) { - const u32 written_mask = written_storage_buffers[stage]; - ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { + const u32 written_mask = channel_state->written_storage_buffers[stage]; + ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { // Resolve buffer - Binding& binding = storage_buffers[stage][index]; + Binding& binding = channel_state->storage_buffers[stage][index]; const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); binding.buffer_id = buffer_id; // Mark buffer as written if needed @@ -1117,11 +1133,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { template <class P> void BufferCache<P>::UpdateTextureBuffers(size_t stage) { - ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { - Binding& binding = texture_buffers[stage][index]; + ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = channel_state->texture_buffers[stage][index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark buffer as written if needed - if (((written_texture_buffers[stage] >> index) & 1) != 0) { + if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1144,11 +1160,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { const u32 size = binding.size; const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (binding.enable == 0 || size == 0 || !cpu_addr) { - transform_feedback_buffers[index] = NULL_BINDING; + channel_state->transform_feedback_buffers[index] = NULL_BINDING; return; } const BufferId buffer_id = FindBuffer(*cpu_addr, size); - transform_feedback_buffers[index] = Binding{ + channel_state->transform_feedback_buffers[index] = Binding{ .cpu_addr = *cpu_addr, .size = size, .buffer_id = buffer_id, @@ -1158,8 +1174,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { - Binding& binding = compute_uniform_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { + Binding& binding = channel_state->compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute->launch_description; if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { @@ -1176,12 +1192,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { template <class P> void BufferCache<P>::UpdateComputeStorageBuffers() { - ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { + ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer - Binding& binding = compute_storage_buffers[index]; + Binding& binding = channel_state->compute_storage_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed - if (((written_compute_storage_buffers >> index) & 1) != 0) { + if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1189,11 +1205,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { template <class P> void BufferCache<P>::UpdateComputeTextureBuffers() { - ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { - Binding& binding = compute_texture_buffers[index]; + ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = channel_state->compute_texture_buffers[index]; binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed - if (((written_compute_texture_buffers >> index) & 1) != 0) { + if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) { MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); @@ -1260,7 +1276,7 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu const VAddr overlap_cpu_addr = overlap.CpuAddr(); const bool expands_left = overlap_cpu_addr < begin; if (expands_left) { - cpu_addr = begin = overlap_cpu_addr; + begin = overlap_cpu_addr; } const VAddr overlap_end = overlap_cpu_addr + overlap.SizeBytes(); const bool expands_right = overlap_end > end; @@ -1274,7 +1290,7 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu has_stream_leap = true; if (expands_right) { begin -= CACHING_PAGESIZE * 256; - cpu_addr = begin; + cpu_addr = begin - CACHING_PAGESIZE; } if (expands_left) { end += CACHING_PAGESIZE * 256; @@ -1297,7 +1313,7 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, if (accumulate_stream_score) { new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); } - boost::container::small_vector<BufferCopy, 1> copies; + boost::container::small_vector<BufferCopy, 10> copies; const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); copies.push_back(BufferCopy{ .src_offset = 0, @@ -1608,13 +1624,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { const auto replace = [scalar_replace](std::span<Binding> bindings) { std::ranges::for_each(bindings, scalar_replace); }; - scalar_replace(index_buffer); - replace(vertex_buffers); - std::ranges::for_each(uniform_buffers, replace); - std::ranges::for_each(storage_buffers, replace); - replace(transform_feedback_buffers); - replace(compute_uniform_buffers); - replace(compute_storage_buffers); + scalar_replace(channel_state->index_buffer); + replace(channel_state->vertex_buffers); + std::ranges::for_each(channel_state->uniform_buffers, replace); + std::ranges::for_each(channel_state->storage_buffers, replace); + replace(channel_state->transform_feedback_buffers); + replace(channel_state->compute_uniform_buffers); + replace(channel_state->compute_storage_buffers); // Mark the whole buffer as CPU written to stop tracking CPU writes if (!do_not_mark) { @@ -1632,8 +1648,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { template <class P> void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - dirty_uniform_buffers.fill(~u32{0}); - uniform_buffer_binding_sizes.fill({}); + channel_state->dirty_uniform_buffers.fill(~u32{0}); + channel_state->uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d->dirty.flags; flags[Dirty::IndexBuffer] = true; @@ -1641,13 +1657,12 @@ void BufferCache<P>::NotifyBufferDeletion() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { flags[Dirty::VertexBuffer0 + index] = true; } - has_deleted_buffers = true; + channel_state->has_deleted_buffers = true; } template <class P> -typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, - u32 cbuf_index, - bool is_written) const { +Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, + bool is_written) const { const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); const auto size = [&]() { const bool is_nvn_cbuf = cbuf_index == 0; @@ -1679,8 +1694,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s } template <class P> -typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( - GPUVAddr gpu_addr, u32 size, PixelFormat format) { +TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format) { const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); TextureBufferBinding binding; if (!cpu_addr || size == 0) { @@ -1719,7 +1734,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { template <class P> bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { if constexpr (IS_OPENGL) { - return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; + return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; } else { // Only OpenGL has fast uniform buffers return false; @@ -1728,14 +1743,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) template <class P> std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { - auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; - return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); + auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); } template <class P> std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { - auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; - return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); + auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; + return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); } } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 7c6ef49d5..60a1f285e 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 { MarkQuery = 3, }; -template <typename P> -class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { +static constexpr BufferId NULL_BUFFER_ID{0}; +static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); + +struct Binding { + VAddr cpu_addr{}; + u32 size{}; + BufferId buffer_id; +}; + +struct TextureBufferBinding : Binding { + PixelFormat format; +}; + +static constexpr Binding NULL_BINDING{ + .cpu_addr = 0, + .size = 0, + .buffer_id = NULL_BUFFER_ID, +}; + +class BufferCacheChannelInfo : public ChannelInfo { +public: + BufferCacheChannelInfo() = delete; + BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {} + BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete; + BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete; + + Binding index_buffer; + std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; + std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; + std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; + std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; + std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; + Binding count_buffer_binding; + Binding indirect_buffer_binding; + + std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; + std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; + std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; + + std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; + + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; + + std::array<u32, NUM_STAGES> enabled_storage_buffers{}; + std::array<u32, NUM_STAGES> written_storage_buffers{}; + u32 enabled_compute_storage_buffers = 0; + u32 written_compute_storage_buffers = 0; + + std::array<u32, NUM_STAGES> enabled_texture_buffers{}; + std::array<u32, NUM_STAGES> written_texture_buffers{}; + std::array<u32, NUM_STAGES> image_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; + + std::array<u32, 16> uniform_cache_hits{}; + std::array<u32, 16> uniform_cache_shots{}; + + u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; + + bool has_deleted_buffers = false; + + std::array<u32, NUM_STAGES> dirty_uniform_buffers{}; + std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> + uniform_buffer_binding_sizes{}; +}; + +template <class P> +class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> { // Page size for caching purposes. // This is unrelated to the CPU page size and it can be changed as it seems optimal. static constexpr u32 CACHING_PAGEBITS = 16; @@ -105,8 +175,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; - static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; static constexpr s64 TARGET_THRESHOLD = 4_GiB; @@ -150,8 +218,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI using OverlapSection = boost::icl::inter_section<int>; using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; - struct Empty {}; - struct OverlapResult { std::vector<BufferId> ids; VAddr begin; @@ -159,25 +225,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI bool has_stream_leap = false; }; - struct Binding { - VAddr cpu_addr{}; - u32 size{}; - BufferId buffer_id; - }; - - struct TextureBufferBinding : Binding { - PixelFormat format; - }; - - static constexpr Binding NULL_BINDING{ - .cpu_addr = 0, - .size = 0, - .buffer_id = NULL_BUFFER_ID, - }; - public: - static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_); @@ -497,51 +545,6 @@ private: u32 last_index_count = 0; - Binding index_buffer; - std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; - std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; - std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; - std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; - std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; - Binding count_buffer_binding; - Binding indirect_buffer_binding; - - std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; - std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; - std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; - - std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; - u32 enabled_compute_uniform_buffer_mask = 0; - - const UniformBufferSizes* uniform_buffer_sizes{}; - const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; - - std::array<u32, NUM_STAGES> enabled_storage_buffers{}; - std::array<u32, NUM_STAGES> written_storage_buffers{}; - u32 enabled_compute_storage_buffers = 0; - u32 written_compute_storage_buffers = 0; - - std::array<u32, NUM_STAGES> enabled_texture_buffers{}; - std::array<u32, NUM_STAGES> written_texture_buffers{}; - std::array<u32, NUM_STAGES> image_texture_buffers{}; - u32 enabled_compute_texture_buffers = 0; - u32 written_compute_texture_buffers = 0; - u32 image_compute_texture_buffers = 0; - - std::array<u32, 16> uniform_cache_hits{}; - std::array<u32, 16> uniform_cache_shots{}; - - u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; - - bool has_deleted_buffers = false; - - std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> - dirty_uniform_buffers{}; - std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; - std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, - std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> - uniform_buffer_binding_sizes{}; - MemoryTracker memory_tracker; IntervalSet uncommitted_ranges; IntervalSet common_ranges; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2f986097f..62d70e9f3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -593,6 +593,12 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { +#if ANDROID + if (!Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + return; + } +#endif switch (regs.clear_report_value) { case Regs::ClearReport::ZPassPixelCount: rasterizer->ResetCounter(QueryType::SamplesPassed); @@ -614,6 +620,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() { case Regs::ReportSemaphore::Report::Payload: return regs.report_semaphore.payload; case Regs::ReportSemaphore::Report::ZPassPixelCount64: +#if ANDROID + if (!Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + return 120; + } +#endif // Deferred. rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, system.GPU().GetTicks()); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 295a416a8..456f733cf 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -14,6 +14,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "core/hle/service/nvdrv/nvdata.h" #include "core/perf_stats.h" #include "video_core/cdma_pusher.h" diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3c5317777..889144f38 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -7,7 +7,7 @@ #include "common/settings.h" #include "common/thread.h" #include "core/core.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "video_core/control/scheduler.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index 3e9022dce..cd6a3a9b8 100644 --- a/src/video_core/host1x/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -5,6 +5,7 @@ #include <fstream> #include <vector> #include "common/assert.h" +#include "common/scope_exit.h" #include "common/settings.h" #include "video_core/host1x/codecs/codec.h" #include "video_core/host1x/codecs/h264.h" @@ -14,6 +15,8 @@ #include "video_core/memory_manager.h" extern "C" { +#include <libavfilter/buffersink.h> +#include <libavfilter/buffersrc.h> #include <libavutil/opt.h> #ifdef LIBVA_FOUND // for querying VAAPI driver information @@ -85,6 +88,10 @@ Codec::~Codec() { // Free libav memory avcodec_free_context(&av_codec_ctx); av_buffer_unref(&av_gpu_decoder); + + if (filters_initialized) { + avfilter_graph_free(&av_filter_graph); + } } bool Codec::CreateGpuAvDevice() { @@ -167,6 +174,62 @@ void Codec::InitializeGpuDecoder() { av_codec_ctx->get_format = GetGpuFormat; } +void Codec::InitializeAvFilters(AVFrame* frame) { + const AVFilter* buffer_src = avfilter_get_by_name("buffer"); + const AVFilter* buffer_sink = avfilter_get_by_name("buffersink"); + AVFilterInOut* inputs = avfilter_inout_alloc(); + AVFilterInOut* outputs = avfilter_inout_alloc(); + SCOPE_EXIT({ + avfilter_inout_free(&inputs); + avfilter_inout_free(&outputs); + }); + + // Don't know how to get the accurate time_base but it doesn't matter for yadif filter + // so just use 1/1 to make buffer filter happy + std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame->width, + frame->height, frame->format); + + av_filter_graph = avfilter_graph_alloc(); + int ret = avfilter_graph_create_filter(&av_filter_src_ctx, buffer_src, "in", args.c_str(), + nullptr, av_filter_graph); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter source error: {}", ret); + return; + } + + ret = avfilter_graph_create_filter(&av_filter_sink_ctx, buffer_sink, "out", nullptr, nullptr, + av_filter_graph); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_create_filter sink error: {}", ret); + return; + } + + inputs->name = av_strdup("out"); + inputs->filter_ctx = av_filter_sink_ctx; + inputs->pad_idx = 0; + inputs->next = nullptr; + + outputs->name = av_strdup("in"); + outputs->filter_ctx = av_filter_src_ctx; + outputs->pad_idx = 0; + outputs->next = nullptr; + + const char* description = "yadif=1:-1:0"; + ret = avfilter_graph_parse_ptr(av_filter_graph, description, &inputs, &outputs, nullptr); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_parse_ptr error: {}", ret); + return; + } + + ret = avfilter_graph_config(av_filter_graph, nullptr); + if (ret < 0) { + LOG_ERROR(Service_NVDRV, "avfilter_graph_config error: {}", ret); + return; + } + + filters_initialized = true; +} + void Codec::Initialize() { const AVCodecID codec = [&] { switch (current_codec) { @@ -271,8 +334,34 @@ void Codec::Decode() { UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(std::move(final_frame)); - if (av_frames.size() > 10) { + if (!final_frame->interlaced_frame) { + av_frames.push(std::move(final_frame)); + } else { + if (!filters_initialized) { + InitializeAvFilters(final_frame.get()); + } + if (const int ret = av_buffersrc_add_frame_flags(av_filter_src_ctx, final_frame.get(), + AV_BUFFERSRC_FLAG_KEEP_REF); + ret) { + LOG_DEBUG(Service_NVDRV, "av_buffersrc_add_frame_flags error {}", ret); + return; + } + while (true) { + auto filter_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + + int ret = av_buffersink_get_frame(av_filter_sink_ctx, filter_frame.get()); + + if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) + break; + if (ret < 0) { + LOG_DEBUG(Service_NVDRV, "av_buffersink_get_frame error {}", ret); + return; + } + + av_frames.push(std::move(filter_frame)); + } + } + while (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); } diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index 0d45fb7fe..06fe00a4b 100644 --- a/src/video_core/host1x/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h @@ -15,6 +15,7 @@ extern "C" { #pragma GCC diagnostic ignored "-Wconversion" #endif #include <libavcodec/avcodec.h> +#include <libavfilter/avfilter.h> #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif @@ -61,17 +62,24 @@ public: private: void InitializeAvCodecContext(); + void InitializeAvFilters(AVFrame* frame); + void InitializeGpuDecoder(); bool CreateGpuAvDevice(); bool initialized{}; + bool filters_initialized{}; Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; const AVCodec* av_codec{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; AVBufferRef* av_gpu_decoder{nullptr}; + AVFilterContext* av_filter_src_ctx{nullptr}; + AVFilterContext* av_filter_sink_ctx{nullptr}; + AVFilterGraph* av_filter_graph{nullptr}; + Host1x::Host1x& host1x; const Host1x::NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index e8761a747..2d3f58201 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -5,6 +5,7 @@ #include "common/logging/log.h" #include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "video_core/renderer_base.h" namespace VideoCore { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 8d20cbece..3e12a8813 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -9,7 +9,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/framebuffer_layout.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" @@ -89,6 +89,9 @@ public: void RequestScreenshot(void* data, std::function<void(bool)> callback, const Layout::FramebufferLayout& layout); + /// This is called to notify the rendering backend of a surface change + virtual void NotifySurfaceChanged() {} + protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<Core::Frontend::GraphicsContext> context; diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index e2a189b63..be92cc2f4 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "video_core/renderer_null/renderer_null.h" namespace Null { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 724e53edb..c419714d4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -119,7 +119,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, for (auto& stage_uniforms : fast_uniforms) { for (OGLBuffer& buffer : stage_uniforms) { buffer.Create(); - glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr, + glNamedBufferData(buffer.handle, VideoCommon::DEFAULT_SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); } } diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h index ca2bd8e8e..207a75d42 100644 --- a/src/video_core/renderer_opengl/gl_shader_context.h +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -4,6 +4,7 @@ #pragma once #include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ad87f3e80..1c5dbcdd8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -439,6 +439,11 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form return GL_R32UI; } +[[nodiscard]] bool IsAstcRecompressionEnabled() { + return Settings::values.astc_recompression.GetValue() != + Settings::AstcRecompression::Uncompressed; +} + [[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) { switch (Settings::values.astc_recompression.GetValue()) { case Settings::AstcRecompression::Bc1: @@ -697,7 +702,7 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, gl_format = GL_RGBA; gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { gl_internal_format = SelectAstcFormat(info.format, is_srgb); gl_format = GL_NONE; } @@ -1092,7 +1097,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI const bool is_srgb = IsPixelFormatSRGB(info.format); internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; - if (IsPixelFormatASTC(info.format)) { + if (IsPixelFormatASTC(info.format) && IsAstcRecompressionEnabled()) { internal_format = SelectAstcFormat(info.format, is_srgb); } } else { diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index b75d7220d..9a0b10568 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -347,6 +347,14 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, VkFormat VertexFormat(const Device& device, Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { + if (device.MustEmulateScaledFormats()) { + if (type == Maxwell::VertexAttribute::Type::SScaled) { + type = Maxwell::VertexAttribute::Type::SInt; + } else if (type == Maxwell::VertexAttribute::Type::UScaled) { + type = Maxwell::VertexAttribute::Type::UInt; + } + } + const VkFormat format{([&]() { switch (type) { case Maxwell::VertexAttribute::Type::UnusedEnumDoNotUseBecauseItWillGoAway: diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 8e31eba34..77128c6e2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -16,7 +16,7 @@ #include "common/settings.h" #include "common/telemetry.h" #include "core/core_timing.h" -#include "core/frontend/emu_window.h" +#include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -84,8 +84,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, std::unique_ptr<Core::Frontend::GraphicsContext> context_) try : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), - cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), - instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, + cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), + instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, Settings::values.renderer_debug.GetValue())), debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), surface(CreateSurface(instance, render_window.GetWindowInfo())), @@ -93,7 +93,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, state_tracker(), scheduler(device, state_tracker), swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, render_window.GetFramebufferLayout().height, false), - present_manager(render_window, device, memory_allocator, scheduler, swapchain), + present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, + surface), blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, scheduler, screen_info), rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f44367cb2..b2e8cbd1b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -54,6 +54,10 @@ public: return device.GetDriverName(); } + void NotifySurfaceChanged() override { + present_manager.NotifySurfaceChanged(); + } + private: void Report() const; @@ -63,7 +67,7 @@ private: Core::Memory::Memory& cpu_memory; Tegra::GPU& gpu; - Common::DynamicLibrary library; + std::shared_ptr<Common::DynamicLibrary> library; vk::InstanceDispatch dld; vk::Instance instance; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 1e0fdd3d9..7cdde992b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -37,6 +37,10 @@ #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#ifdef ANDROID +extern u32 GetAndroidScreenRotation(); +#endif + namespace Vulkan { namespace { @@ -74,7 +78,48 @@ struct ScreenRectVertex { } }; -constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { +#ifdef ANDROID + +std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { + constexpr u32 ROTATION_0 = 0; + constexpr u32 ROTATION_90 = 1; + constexpr u32 ROTATION_180 = 2; + constexpr u32 ROTATION_270 = 3; + + // clang-format off + switch (GetAndroidScreenRotation()) { + case ROTATION_0: + // Desktop + return { 2.f / width, 0.f, 0.f, 0.f, + 0.f, 2.f / height, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + -1.f, -1.f, 0.f, 1.f}; + case ROTATION_180: + // Reverse desktop + return {-2.f / width, 0.f, 0.f, 0.f, + 0.f, -2.f / height, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 1.f, 1.f, 0.f, 1.f}; + case ROTATION_270: + // Reverse landscape + return { 0.f, -2.f / width, 0.f, 0.f, + 2.f / height, 0.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + -1.f, 1.f, 0.f, 1.f}; + case ROTATION_90: + default: + // Landscape + return { 0.f, 2.f / width, 0.f, 0.f, + -2.f / height, 0.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 1.f, -1.f, 0.f, 1.f}; + } + // clang-format on +} + +#else + +std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { // clang-format off return { 2.f / width, 0.f, 0.f, 0.f, 0.f, 2.f / height, 0.f, 0.f, @@ -83,6 +128,8 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { // clang-format on } +#endif + u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { using namespace VideoCore::Surface; return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); @@ -441,7 +488,12 @@ void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& f if (const std::size_t swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count || current_srgb != is_srgb) { current_srgb = is_srgb; +#ifdef ANDROID + // Android is already ordered the same as Switch. + image_view_format = current_srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; +#else image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; +#endif image_count = swapchain_images; Recreate(); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 9627eb129..daa128399 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -303,9 +303,13 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m DescriptorPool& descriptor_pool) : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, guest_descriptor_queue{guest_descriptor_queue_}, - uint8_pass(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue), quad_index_pass(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue) { + if (device.GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + // TODO: FixMe: Uint8Pass compute shader does not build on some Qualcomm drivers. + uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool, + compute_pass_descriptor_queue); + } quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, scheduler_, staging_pool_); quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, @@ -442,7 +446,9 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat topology == PrimitiveTopology::QuadStrip); } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { vk_index_type = VK_INDEX_TYPE_UINT16; - std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); + if (uint8_pass) { + std::tie(vk_buffer, vk_offset) = uint8_pass->Assemble(num_indices, buffer, offset); + } } if (vk_buffer == VK_NULL_HANDLE) { // Vulkan doesn't support null index buffers. Replace it with our own null buffer. diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index b1f3c071f..92b4f7859 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -139,7 +139,7 @@ private: vk::Buffer null_buffer; MemoryCommit null_buffer_commit; - Uint8Pass uint8_pass; + std::unique_ptr<Uint8Pass> uint8_pass; QuadIndexedPass quad_index_pass; }; diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 8b65aeaeb..b128c4f6e 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -128,15 +128,12 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, const std::array signal_values{host_tick, u64(0)}; const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; - const u32 num_wait_semaphores = wait_semaphore ? 2 : 1; - const std::array wait_values{host_tick - 1, u64(1)}; - const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; - + const u32 num_wait_semaphores = wait_semaphore ? 1 : 0; const VkTimelineSemaphoreSubmitInfo timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .pNext = nullptr, - .waitSemaphoreValueCount = num_wait_semaphores, - .pWaitSemaphoreValues = wait_values.data(), + .waitSemaphoreValueCount = 0, + .pWaitSemaphoreValues = nullptr, .signalSemaphoreValueCount = num_signal_semaphores, .pSignalSemaphoreValues = signal_values.data(), }; @@ -144,7 +141,7 @@ VkResult MasterSemaphore::SubmitQueueTimeline(vk::CommandBuffer& cmdbuf, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timeline_si, .waitSemaphoreCount = num_wait_semaphores, - .pWaitSemaphores = wait_semaphores.data(), + .pWaitSemaphores = &wait_semaphore, .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1, .pCommandBuffers = cmdbuf.address(), diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 66dfe5733..9482e91b0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,14 +114,16 @@ Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribut return Shader::AttributeType::Disabled; case Maxwell::VertexAttribute::Type::SNorm: case Maxwell::VertexAttribute::Type::UNorm: - case Maxwell::VertexAttribute::Type::UScaled: - case Maxwell::VertexAttribute::Type::SScaled: case Maxwell::VertexAttribute::Type::Float: return Shader::AttributeType::Float; case Maxwell::VertexAttribute::Type::SInt: return Shader::AttributeType::SignedInt; case Maxwell::VertexAttribute::Type::UInt: return Shader::AttributeType::UnsignedInt; + case Maxwell::VertexAttribute::Type::UScaled: + return Shader::AttributeType::UnsignedScaled; + case Maxwell::VertexAttribute::Type::SScaled: + return Shader::AttributeType::SignedScaled; } return Shader::AttributeType::Float; } @@ -286,14 +288,17 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, - workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), + workers(device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY + ? 1 + : (std::max(std::thread::hardware_concurrency(), 2U) - 1), + "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverId driver_id{device.GetDriverID()}; profile = Shader::Profile{ .supported_spirv = device.SupportedSpirvVersion(), .unified_descriptor_binding = true, - .support_descriptor_aliasing = true, + .support_descriptor_aliasing = device.IsDescriptorAliasingSupported(), .support_int8 = device.IsInt8Supported(), .support_int16 = device.IsShaderInt16Supported(), .support_int64 = device.IsShaderInt64Supported(), @@ -324,6 +329,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_derivative_control = true, .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_native_ndc = device.IsExtDepthClipControlSupported(), + .support_scaled_attributes = !device.MustEmulateScaledFormats(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), @@ -341,7 +347,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .has_broken_signed_operations = false, .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, .ignore_nan_fp_comparisons = false, - }; + .has_broken_spirv_subgroup_mask_vector_extract_dynamic = + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; host_info = Shader::HostTranslateInfo{ .support_float16 = device.IsFloat16Supported(), .support_int64 = device.IsShaderInt64Supported(), diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index c49583013..10ace0420 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -4,10 +4,12 @@ #include "common/microprofile.h" #include "common/settings.h" #include "common/thread.h" +#include "core/frontend/emu_window.h" #include "video_core/renderer_vulkan/vk_present_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_surface.h" namespace Vulkan { @@ -92,14 +94,17 @@ bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat form } // Anonymous namespace -PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_, +PresentManager::PresentManager(const vk::Instance& instance_, + Core::Frontend::EmuWindow& render_window_, const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, - Swapchain& swapchain_) - : render_window{render_window_}, device{device_}, + Swapchain& swapchain_, vk::SurfaceKHR& surface_) + : instance{instance_}, render_window{render_window_}, device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_}, - blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, + surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), + swapchain.GetImageViewFormat())}, use_present_thread{Settings::values.async_presentation.GetValue()}, - image_count{swapchain.GetImageCount()} { + image_count{swapchain.GetImageCount()}, last_render_surface{ + render_window_.GetWindowInfo().render_surface} { auto& dld = device.GetLogical(); cmdpool = dld.CreateCommandPool({ @@ -286,14 +291,45 @@ void PresentManager::PresentThread(std::stop_token token) { } } +void PresentManager::NotifySurfaceChanged() { +#ifdef ANDROID + std::scoped_lock lock{recreate_surface_mutex}; + recreate_surface_cv.notify_one(); +#endif +} + void PresentManager::CopyToSwapchain(Frame* frame) { MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); const auto recreate_swapchain = [&] { - swapchain.Create(frame->width, frame->height, frame->is_srgb); + swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); image_count = swapchain.GetImageCount(); }; +#ifdef ANDROID + std::unique_lock lock{recreate_surface_mutex}; + + const auto needs_recreation = [&] { + if (last_render_surface != render_window.GetWindowInfo().render_surface) { + return true; + } + if (swapchain.NeedsRecreation(frame->is_srgb)) { + return true; + } + return false; + }; + + recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), + [&]() { return !needs_recreation(); }); + + // If the frontend recreated the surface, recreate the renderer surface and swapchain. + if (last_render_surface != render_window.GetWindowInfo().render_surface) { + last_render_surface = render_window.GetWindowInfo().render_surface; + surface = CreateSurface(instance, render_window.GetWindowInfo()); + recreate_swapchain(); + } +#endif + // If the size or colorspace of the incoming frames has changed, recreate the swapchain // to account for that. const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb); @@ -436,7 +472,7 @@ void PresentManager::CopyToSwapchain(Frame* frame) { // Submit the image copy/blit to the swapchain { - std::scoped_lock lock{scheduler.submit_mutex}; + std::scoped_lock submit_lock{scheduler.submit_mutex}; switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) { case VK_SUCCESS: @@ -454,4 +490,4 @@ void PresentManager::CopyToSwapchain(Frame* frame) { swapchain.Present(render_semaphore); } -} // namespace Vulkan +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 420a775e2..4ac2e2395 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -37,8 +37,9 @@ struct Frame { class PresentManager { public: - PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device, - MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain); + PresentManager(const vk::Instance& instance, Core::Frontend::EmuWindow& render_window, + const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler, + Swapchain& swapchain, vk::SurfaceKHR& surface); ~PresentManager(); /// Returns the last used presentation frame @@ -54,30 +55,38 @@ public: /// Waits for the present thread to finish presenting all queued frames. void WaitPresent(); + /// This is called to notify the rendering backend of a surface change + void NotifySurfaceChanged(); + private: void PresentThread(std::stop_token token); void CopyToSwapchain(Frame* frame); private: + const vk::Instance& instance; Core::Frontend::EmuWindow& render_window; const Device& device; MemoryAllocator& memory_allocator; Scheduler& scheduler; Swapchain& swapchain; + vk::SurfaceKHR& surface; vk::CommandPool cmdpool; std::vector<Frame> frames; std::queue<Frame*> present_queue; std::queue<Frame*> free_queue; std::condition_variable_any frame_cv; std::condition_variable free_cv; + std::condition_variable recreate_surface_cv; std::mutex swapchain_mutex; + std::mutex recreate_surface_mutex; std::mutex queue_mutex; std::mutex free_mutex; std::jthread present_thread; bool blit_supported; bool use_present_thread; - std::size_t image_count; + std::size_t image_count{}; + void* last_render_surface{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8d3a9736b..84e3a30cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -188,7 +188,14 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { FlushWork(); gpu_memory->FlushCaching(); +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache.UpdateCounters(); + } +#else query_cache.UpdateCounters(); +#endif GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { @@ -272,7 +279,14 @@ void RasterizerVulkan::DrawTexture() { SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache.UpdateCounters(); + } +#else query_cache.UpdateCounters(); +#endif texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -743,7 +757,11 @@ void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_load } void RasterizerVulkan::FlushWork() { +#ifdef ANDROID + static constexpr u32 DRAWS_TO_DISPATCH = 1024; +#else static constexpr u32 DRAWS_TO_DISPATCH = 4096; +#endif // ANDROID // Only check multiples of 8 draws static_assert(DRAWS_TO_DISPATCH % 8 == 0); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 80455ec08..17ef61147 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -239,7 +239,14 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se void Scheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache->UpdateCounters(); + } +#else query_cache->UpdateCounters(); +#endif } } @@ -250,7 +257,14 @@ void Scheduler::InvalidateState() { } void Scheduler::EndPendingOperations() { +#if ANDROID + if (Settings::IsGPULevelHigh()) { + // This is problematic on Android, disable on GPU Normal. + query_cache->DisableStreams(); + } +#else query_cache->DisableStreams(); +#endif EndRenderPass(); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 8c0dec590..afcf34fba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -107,16 +107,17 @@ VkCompositeAlphaFlagBitsKHR ChooseAlphaFlags(const VkSurfaceCapabilitiesKHR& cap Swapchain::Swapchain(VkSurfaceKHR surface_, const Device& device_, Scheduler& scheduler_, u32 width_, u32 height_, bool srgb) : surface{surface_}, device{device_}, scheduler{scheduler_} { - Create(width_, height_, srgb); + Create(surface_, width_, height_, srgb); } Swapchain::~Swapchain() = default; -void Swapchain::Create(u32 width_, u32 height_, bool srgb) { +void Swapchain::Create(VkSurfaceKHR surface_, u32 width_, u32 height_, bool srgb) { is_outdated = false; is_suboptimal = false; width = width_; height = height_; + surface = surface_; const auto physical_device = device.GetPhysical(); const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; @@ -266,7 +267,12 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); +#ifdef ANDROID + // Android is already ordered the same as Switch. + image_view_format = srgb ? VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; +#else image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; +#endif } void Swapchain::CreateSemaphores() { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index bf1ea7254..b8a1465a6 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -24,7 +24,7 @@ public: ~Swapchain(); /// Creates (or recreates) the swapchain with a given size. - void Create(u32 width, u32 height, bool srgb); + void Create(VkSurfaceKHR surface, u32 width, u32 height, bool srgb); /// Acquires the next image in the swapchain, waits as needed. bool AcquireNextImage(); @@ -118,7 +118,7 @@ private: bool NeedsPresentModeUpdate() const; - const VkSurfaceKHR surface; + VkSurfaceKHR surface; const Device& device; Scheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index db04943eb..a802d3c49 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp @@ -1,6 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +#include <adrenotools/driver.h> +#endif + #include "common/literals.h" #include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -13,7 +17,10 @@ namespace Vulkan { using namespace Common::Literals; TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) - : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { +#ifndef ANDROID + : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} +#endif +{ { std::scoped_lock lk{m_submission_lock}; m_submission_time = std::chrono::steady_clock::now(); @@ -30,6 +37,7 @@ void TurboMode::QueueSubmitted() { } void TurboMode::Run(std::stop_token stop_token) { +#ifndef ANDROID auto& dld = m_device.GetLogical(); // Allocate buffer. 2MiB should be sufficient. @@ -142,8 +150,14 @@ void TurboMode::Run(std::stop_token stop_token) { // Create a single command buffer. auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; +#endif while (!stop_token.stop_requested()) { +#ifdef ANDROID +#ifdef ARCHITECTURE_arm64 + adrenotools_set_turbo(true); +#endif +#else // Reset the fence. fence.Reset(); @@ -209,7 +223,7 @@ void TurboMode::Run(std::stop_token stop_token) { // Wait for completion. fence.Wait(); - +#endif // Wait for the next graphics queue submission if necessary. std::unique_lock lk{m_submission_lock}; Common::CondvarWait(m_submission_cv, lk, stop_token, [this] { @@ -217,6 +231,9 @@ void TurboMode::Run(std::stop_token stop_token) { std::chrono::milliseconds{100}; }); } +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) + adrenotools_set_turbo(false); +#endif } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h index 99b5ac50b..9341c9867 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.h +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h @@ -23,8 +23,10 @@ public: private: void Run(std::stop_token stop_token); +#ifndef ANDROID Device m_device; MemoryAllocator m_allocator; +#endif std::mutex m_submission_lock; std::condition_variable_any m_submission_cv; std::chrono::time_point<std::chrono::steady_clock> m_submission_time{}; diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 310fb551a..e77b576ec 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -31,7 +31,7 @@ struct DescriptorUpdateEntry { class UpdateDescriptorQueue final { // This should be plenty for the vast majority of cases. Most desktop platforms only // provide up to 3 swapchain images. - static constexpr size_t FRAMES_IN_FLIGHT = 5; + static constexpr size_t FRAMES_IN_FLIGHT = 7; static constexpr size_t FRAME_PAYLOAD_SIZE = 0x20000; static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2cf082c5d..c7f7448e9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -850,15 +850,11 @@ void TextureCache<P>::PopAsyncFlushes() { template <class P> ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload) { const ImageInfo dst_info(operand); - const ImageId dst_id = FindDMAImage(dst_info, operand.address); - if (!dst_id) { - return NULL_IMAGE_ID; - } - auto& image = slot_images[dst_id]; - if (False(image.flags & ImageFlagBits::GpuModified)) { - // No need to waste time on an image that's synced with guest + const ImageId image_id = FindDMAImage(dst_info, operand.address); + if (!image_id) { return NULL_IMAGE_ID; } + auto& image = slot_images[image_id]; if (!is_upload && !image.info.dma_downloaded) { // Force a full sync. image.info.dma_downloaded = true; @@ -868,7 +864,7 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo if (!base) { return NULL_IMAGE_ID; } - return dst_id; + return image_id; } template <class P> diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index 10a001b8f..9de484c29 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -13,11 +13,39 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, [[maybe_unused]] void* user_data) { // Skip logging known false-positive validation errors switch (static_cast<u32>(data->messageIdNumber)) { +#ifdef ANDROID + case 0xbf9cf353u: // VUID-vkCmdBindVertexBuffers2-pBuffers-04111 + // The below are due to incorrect reporting of extendedDynamicState + case 0x1093bebbu: // VUID-vkCmdSetCullMode-None-03384 + case 0x9215850fu: // VUID-vkCmdSetDepthTestEnable-None-03352 + case 0x86bf18dcu: // VUID-vkCmdSetDepthWriteEnable-None-03354 + case 0x0792ad08u: // VUID-vkCmdSetStencilOp-None-03351 + case 0x93e1ba4eu: // VUID-vkCmdSetFrontFace-None-03383 + case 0xac9c13c5u: // VUID-vkCmdSetStencilTestEnable-None-03350 + case 0xc9a2001bu: // VUID-vkCmdSetDepthBoundsTestEnable-None-03349 + case 0x8b7159a7u: // VUID-vkCmdSetDepthCompareOp-None-03353 + // The below are due to incorrect reporting of extendedDynamicState2 + case 0xb13c8036u: // VUID-vkCmdSetDepthBiasEnable-None-04872 + case 0xdff2e5c1u: // VUID-vkCmdSetRasterizerDiscardEnable-None-04871 + case 0x0cc85f41u: // VUID-vkCmdSetPrimitiveRestartEnable-None-04866 + case 0x01257b492: // VUID-vkCmdSetLogicOpEXT-None-0486 + // The below are due to incorrect reporting of vertexInputDynamicState + case 0x398e0dabu: // VUID-vkCmdSetVertexInputEXT-None-04790 + // The below are due to incorrect reporting of extendedDynamicState3 + case 0x970c11a5u: // VUID-vkCmdSetColorWriteMaskEXT-extendedDynamicState3ColorWriteMask-07364 + case 0x6b453f78u: // VUID-vkCmdSetColorBlendEnableEXT-extendedDynamicState3ColorBlendEnable-07355 + case 0xf66469d0u: // VUID-vkCmdSetColorBlendEquationEXT-extendedDynamicState3ColorBlendEquation-07356 + case 0x1d43405eu: // VUID-vkCmdSetLogicOpEnableEXT-extendedDynamicState3LogicOpEnable-07365 + case 0x638462e8u: // VUID-vkCmdSetDepthClampEnableEXT-extendedDynamicState3DepthClampEnable-07448 + // Misc + case 0xe0a2da61u: // VUID-vkCmdDrawIndexed-format-07753 +#else case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0]) case 0xe8616bf2u: // Bound VkDescriptorSet 0x0[] was destroyed. Likely push_descriptor related case 0x1608dec0u: // Image layout in vkUpdateDescriptorSet doesn't match descriptor use case 0x55362756u: // Descriptor binding and framebuffer attachment overlap +#endif return VK_FALSE; default: break; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aea677cb3..0158b6b0d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -18,6 +18,10 @@ #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +#include <adrenotools/bcenabler.h> +#endif + namespace Vulkan { using namespace Common::Literals; namespace { @@ -262,6 +266,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica return format_properties; } +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) +void OverrideBcnFormats(std::unordered_map<VkFormat, VkFormatProperties>& format_properties) { + // These properties are extracted from Adreno driver 512.687.0 + constexpr VkFormatFeatureFlags tiling_features{ + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; + + constexpr VkFormatFeatureFlags buffer_features{VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT}; + + static constexpr std::array bcn_formats{ + VK_FORMAT_BC1_RGBA_SRGB_BLOCK, VK_FORMAT_BC1_RGBA_UNORM_BLOCK, VK_FORMAT_BC2_SRGB_BLOCK, + VK_FORMAT_BC2_UNORM_BLOCK, VK_FORMAT_BC3_SRGB_BLOCK, VK_FORMAT_BC3_UNORM_BLOCK, + VK_FORMAT_BC4_SNORM_BLOCK, VK_FORMAT_BC4_UNORM_BLOCK, VK_FORMAT_BC5_SNORM_BLOCK, + VK_FORMAT_BC5_UNORM_BLOCK, VK_FORMAT_BC6H_SFLOAT_BLOCK, VK_FORMAT_BC6H_UFLOAT_BLOCK, + VK_FORMAT_BC7_SRGB_BLOCK, VK_FORMAT_BC7_UNORM_BLOCK, + }; + + for (const auto format : bcn_formats) { + format_properties[format].linearTilingFeatures = tiling_features; + format_properties[format].optimalTilingFeatures = tiling_features; + format_properties[format].bufferFeatures = buffer_features; + } +} +#endif + NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, const std::set<std::string, std::less<>>& exts) { if (exts.contains(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) { @@ -302,6 +332,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_suitable = GetSuitability(surface != nullptr); const VkDriverId driver_id = properties.driver.driverID; + const auto device_id = properties.properties.deviceID; const bool is_radv = driver_id == VK_DRIVER_ID_MESA_RADV; const bool is_amd_driver = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE; @@ -310,9 +341,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR const bool is_intel_anv = driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; const bool is_nvidia = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY; const bool is_mvk = driver_id == VK_DRIVER_ID_MOLTENVK; + const bool is_qualcomm = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + const bool is_turnip = driver_id == VK_DRIVER_ID_MESA_TURNIP; + const bool is_s8gen2 = device_id == 0x43050a01; - if (is_mvk && !is_suitable) { - LOG_WARNING(Render_Vulkan, "Unsuitable driver is MoltenVK, continuing anyway"); + if ((is_mvk || is_qualcomm || is_turnip) && !is_suitable) { + LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); } else if (!is_suitable) { throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); } @@ -355,6 +389,59 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR CollectPhysicalMemoryInfo(); CollectToolingInfo(); +#ifdef ANDROID + if (is_qualcomm || is_turnip) { + LOG_WARNING(Render_Vulkan, + "Qualcomm and Turnip drivers have broken VK_EXT_custom_border_color"); + extensions.custom_border_color = false; + loaded_extensions.erase(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + } + + if (is_qualcomm) { + must_emulate_scaled_formats = true; + + LOG_WARNING(Render_Vulkan, "Qualcomm drivers have broken VK_EXT_extended_dynamic_state"); + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + + LOG_WARNING(Render_Vulkan, + "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); + extensions.push_descriptor = false; + loaded_extensions.erase(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); + +#ifdef ARCHITECTURE_arm64 + // Patch the driver to enable BCn textures. + const auto major = (properties.properties.driverVersion >> 24) << 2; + const auto minor = (properties.properties.driverVersion >> 12) & 0xFFFU; + const auto vendor = properties.properties.vendorID; + const auto patch_status = adrenotools_get_bcn_type(major, minor, vendor); + + if (patch_status == ADRENOTOOLS_BCN_PATCH) { + LOG_INFO(Render_Vulkan, "Patching Adreno driver to support BCn texture formats"); + if (adrenotools_patch_bcn( + reinterpret_cast<void*>(dld.vkGetPhysicalDeviceFormatProperties))) { + OverrideBcnFormats(format_properties); + } else { + LOG_ERROR(Render_Vulkan, "Patch failed! Driver code may now crash"); + } + } else if (patch_status == ADRENOTOOLS_BCN_BLOB) { + LOG_INFO(Render_Vulkan, "Adreno driver supports BCn textures without patches"); + } else { + LOG_WARNING(Render_Vulkan, "Adreno driver can't be patched to enable BCn textures"); + } +#endif // ARCHITECTURE_arm64 + } + + const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; + if (is_arm) { + must_emulate_scaled_formats = true; + + LOG_WARNING(Render_Vulkan, "ARM drivers have broken VK_EXT_extended_dynamic_state"); + extensions.extended_dynamic_state = false; + loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); + } +#endif // ANDROID + if (is_nvidia) { const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; const auto arch = GetNvidiaArchitecture(physical, supported_extensions); @@ -388,7 +475,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR loaded_extensions.erase(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); } } - if (extensions.extended_dynamic_state2 && is_radv) { + if (extensions.extended_dynamic_state2 && (is_radv || is_qualcomm)) { const u32 version = (properties.properties.driverVersion << 3) >> 3; if (version < VK_MAKE_API_VERSION(0, 22, 3, 1)) { LOG_WARNING( @@ -415,7 +502,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR dynamic_state3_enables = false; } } - if (extensions.vertex_input_dynamic_state && is_radv) { + if (extensions.vertex_input_dynamic_state && (is_radv || is_qualcomm)) { + // Qualcomm S8gen2 drivers do not properly support vertex_input_dynamic_state. // TODO(ameerj): Blacklist only offending driver versions // TODO(ameerj): Confirm if RDNA1 is affected const bool is_rdna2 = @@ -467,8 +555,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); cant_blit_msaa = true; } - if (is_intel_anv) { - LOG_WARNING(Render_Vulkan, "ANV driver does not support native BGR format"); + if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { + LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); must_emulate_bgr565 = true; } if (extensions.push_descriptor && is_intel_anv) { @@ -633,7 +721,8 @@ bool Device::ShouldBoostClocks() const { driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY || driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || - driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA; + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 5f1c63ff9..b692b4be4 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -295,6 +295,11 @@ public: return features.features.textureCompressionASTC_LDR; } + /// Returns true if descriptor aliasing is natively supported. + bool IsDescriptorAliasingSupported() const { + return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + } + /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { return features.shader_float16_int8.shaderFloat16; @@ -495,6 +500,10 @@ public: } bool HasTimelineSemaphore() const { + if (GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + // Timeline semaphores do not work properly on all Qualcomm drivers. + return false; + } return features.timeline_semaphore.timelineSemaphore; } @@ -551,6 +560,10 @@ public: return cant_blit_msaa; } + bool MustEmulateScaledFormats() const { + return must_emulate_scaled_formats; + } + bool MustEmulateBGR565() const { return must_emulate_bgr565; } @@ -666,6 +679,7 @@ private: bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. + bool must_emulate_scaled_formats{}; ///< Requires scaled vertex format emulation bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. diff --git a/src/video_core/vulkan_common/vulkan_library.cpp b/src/video_core/vulkan_common/vulkan_library.cpp index 4eb3913ee..47f6f2a03 100644 --- a/src/video_core/vulkan_common/vulkan_library.cpp +++ b/src/video_core/vulkan_common/vulkan_library.cpp @@ -10,29 +10,35 @@ namespace Vulkan { -Common::DynamicLibrary OpenLibrary() { +std::shared_ptr<Common::DynamicLibrary> OpenLibrary( + [[maybe_unused]] Core::Frontend::GraphicsContext* context) { LOG_DEBUG(Render_Vulkan, "Looking for a Vulkan library"); - Common::DynamicLibrary library; +#if defined(ANDROID) && defined(ARCHITECTURE_arm64) + // Android manages its Vulkan driver from the frontend. + return context->GetDriverLibrary(); +#else + auto library = std::make_shared<Common::DynamicLibrary>(); #ifdef __APPLE__ // Check if a path to a specific Vulkan library has been specified. char* const libvulkan_env = std::getenv("LIBVULKAN_PATH"); - if (!libvulkan_env || !library.Open(libvulkan_env)) { + if (!libvulkan_env || !library->Open(libvulkan_env)) { // Use the libvulkan.dylib from the application bundle. const auto filename = Common::FS::GetBundleDirectory() / "Contents/Frameworks/libvulkan.dylib"; - void(library.Open(Common::FS::PathToUTF8String(filename).c_str())); + void(library->Open(Common::FS::PathToUTF8String(filename).c_str())); } #else std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); LOG_DEBUG(Render_Vulkan, "Trying Vulkan library: {}", filename); - if (!library.Open(filename.c_str())) { + if (!library->Open(filename.c_str())) { // Android devices may not have libvulkan.so.1, only libvulkan.so. filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); LOG_DEBUG(Render_Vulkan, "Trying Vulkan library (second attempt): {}", filename); - void(library.Open(filename.c_str())); + void(library->Open(filename.c_str())); } #endif return library; +#endif } } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_library.h b/src/video_core/vulkan_common/vulkan_library.h index 364ca979b..e1734525e 100644 --- a/src/video_core/vulkan_common/vulkan_library.h +++ b/src/video_core/vulkan_common/vulkan_library.h @@ -3,10 +3,14 @@ #pragma once +#include <memory> + #include "common/dynamic_library.h" +#include "core/frontend/graphics_context.h" namespace Vulkan { -Common::DynamicLibrary OpenLibrary(); +std::shared_ptr<Common::DynamicLibrary> OpenLibrary( + [[maybe_unused]] Core::Frontend::GraphicsContext* context = nullptr); } // namespace Vulkan |
