diff options
Diffstat (limited to 'src/video_core')
55 files changed, 668 insertions, 708 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 16c905db9..2de2beb6e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -18,6 +18,7 @@ add_library(video_core STATIC buffer_cache/usage_tracker.h buffer_cache/word_manager.h cache_types.h + capture.h cdma_pusher.cpp cdma_pusher.h compatible_formats.cpp @@ -101,6 +102,7 @@ add_library(video_core STATIC memory_manager.cpp memory_manager.h precompiled_headers.h + present.h pte_kind.h query_cache/bank_base.h query_cache/query_base.h @@ -274,7 +276,6 @@ add_library(video_core STATIC texture_cache/image_view_info.h texture_cache/render_targets.h texture_cache/samples_helper.h - texture_cache/slot_vector.h texture_cache/texture_cache.cpp texture_cache/texture_cache.h texture_cache/texture_cache_base.h diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b4bf369d1..6d3d933c5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -7,6 +7,7 @@ #include <memory> #include <numeric> +#include "common/range_sets.inc" #include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/guest_memory.h" #include "video_core/host1x/gpu_device_memory_manager.h" @@ -20,7 +21,7 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); - common_ranges.clear(); + gpu_modified_ranges.Clear(); inline_buffer_id = NULL_BUFFER_ID; if (!runtime.CanReportMemoryUsage()) { @@ -44,6 +45,9 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R } template <class P> +BufferCache<P>::~BufferCache() = default; + +template <class P> void BufferCache<P>::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= critical_memory; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; @@ -96,20 +100,17 @@ void BufferCache<P>::TickFrame() { ++frame_tick; delayed_destruction_ring.Tick(); - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - for (auto& buffer : async_buffers_death_ring) { - runtime.FreeDeferredStagingBuffer(buffer); - } - async_buffers_death_ring.clear(); + for (auto& buffer : async_buffers_death_ring) { + runtime.FreeDeferredStagingBuffer(buffer); } + async_buffers_death_ring.clear(); } template <class P> void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) { if (memory_tracker.IsRegionGpuModified(device_addr, size)) { - const IntervalType subtract_interval{device_addr, device_addr + size}; - ClearDownload(subtract_interval); - common_ranges.subtract(subtract_interval); + ClearDownload(device_addr, size); + gpu_modified_ranges.Subtract(device_addr, size); } memory_tracker.MarkRegionAsCpuModified(device_addr, size); } @@ -174,11 +175,11 @@ void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) { } template <class P> -void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { - RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); - uncommitted_ranges.subtract(subtract_interval); - for (auto& interval_set : committed_ranges) { - interval_set.subtract(subtract_interval); +void BufferCache<P>::ClearDownload(DAddr device_addr, u64 size) { + async_downloads.DeleteAll(device_addr, size); + uncommitted_gpu_modified_ranges.Subtract(device_addr, size); + for (auto& interval_set : committed_gpu_modified_ranges) { + interval_set.Subtract(device_addr, size); } } @@ -195,8 +196,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am return false; } - const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; - ClearDownload(subtract_interval); + ClearDownload(*cpu_dest_address, amount); BufferId buffer_a; BufferId buffer_b; @@ -215,21 +215,20 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am .size = amount, }}; - boost::container::small_vector<IntervalType, 4> tmp_intervals; + boost::container::small_vector<std::pair<DAddr, size_t>, 4> tmp_intervals; auto mirror = [&](DAddr base_address, DAddr base_address_end) { const u64 size = base_address_end - base_address; const DAddr diff = base_address - *cpu_src_address; const DAddr new_base_address = *cpu_dest_address + diff; - const IntervalType add_interval{new_base_address, new_base_address + size}; - tmp_intervals.push_back(add_interval); - uncommitted_ranges.add(add_interval); + tmp_intervals.push_back({new_base_address, size}); + uncommitted_gpu_modified_ranges.Add(new_base_address, size); }; - ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); + gpu_modified_ranges.ForEachInRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. - common_ranges.subtract(subtract_interval); + gpu_modified_ranges.Subtract(*cpu_dest_address, amount); const bool has_new_downloads = tmp_intervals.size() != 0; - for (const IntervalType& add_interval : tmp_intervals) { - common_ranges.add(add_interval); + for (const auto& pair : tmp_intervals) { + gpu_modified_ranges.Add(pair.first, pair.second); } const auto& copy = copies[0]; src_buffer.MarkUsage(copy.src_offset, copy.size); @@ -257,9 +256,8 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { } const size_t size = amount * sizeof(u32); - const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size}; - ClearDownload(subtract_interval); - common_ranges.subtract(subtract_interval); + ClearDownload(*cpu_dst_address, size); + gpu_modified_ranges.Subtract(*cpu_dst_address, size); const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size)); Buffer& dest_buffer = slot_buffers[buffer]; @@ -300,11 +298,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( MarkWrittenBuffer(buffer_id, device_addr, size); break; case ObtainBufferOperation::DiscardWrite: { - DAddr device_addr_start = Common::AlignDown(device_addr, 64); - DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); - IntervalType interval{device_addr_start, device_addr_end}; - ClearDownload(interval); - common_ranges.subtract(interval); + const DAddr device_addr_start = Common::AlignDown(device_addr, 64); + const DAddr device_addr_end = Common::AlignUp(device_addr + size, 64); + const size_t new_size = device_addr_end - device_addr_start; + ClearDownload(device_addr_start, new_size); + gpu_modified_ranges.Subtract(device_addr_start, new_size); break; } default: @@ -504,46 +502,40 @@ void BufferCache<P>::FlushCachedWrites() { template <class P> bool BufferCache<P>::HasUncommittedFlushes() const noexcept { - return !uncommitted_ranges.empty() || !committed_ranges.empty(); + return !uncommitted_gpu_modified_ranges.Empty() || !committed_gpu_modified_ranges.empty(); } template <class P> void BufferCache<P>::AccumulateFlushes() { - if (uncommitted_ranges.empty()) { + if (uncommitted_gpu_modified_ranges.Empty()) { return; } - committed_ranges.emplace_back(std::move(uncommitted_ranges)); + committed_gpu_modified_ranges.emplace_back(std::move(uncommitted_gpu_modified_ranges)); } template <class P> bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - return (!async_buffers.empty() && async_buffers.front().has_value()); - } else { - return false; - } + return (!async_buffers.empty() && async_buffers.front().has_value()); } template <class P> void BufferCache<P>::CommitAsyncFlushesHigh() { AccumulateFlushes(); - if (committed_ranges.empty()) { - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional<Async_Buffer>{}); - } + if (committed_gpu_modified_ranges.empty()) { + async_buffers.emplace_back(std::optional<Async_Buffer>{}); return; } MICROPROFILE_SCOPE(GPU_DownloadMemory); - auto it = committed_ranges.begin(); - while (it != committed_ranges.end()) { + auto it = committed_gpu_modified_ranges.begin(); + while (it != committed_gpu_modified_ranges.end()) { auto& current_intervals = *it; auto next_it = std::next(it); - while (next_it != committed_ranges.end()) { - for (auto& interval : *next_it) { - current_intervals.subtract(interval); - } + while (next_it != committed_gpu_modified_ranges.end()) { + next_it->ForEach([¤t_intervals](DAddr start, DAddr end) { + current_intervals.Subtract(start, end - start); + }); next_it++; } it++; @@ -552,10 +544,10 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads; u64 total_size_bytes = 0; u64 largest_copy = 0; - for (const IntervalSet& intervals : committed_ranges) { - for (auto& interval : intervals) { - const std::size_t size = interval.upper() - interval.lower(); - const DAddr device_addr = interval.lower(); + for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) { + range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) { + const std::size_t size = interval_upper - interval_lower; + const DAddr device_addr = interval_lower; ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { const DAddr buffer_start = buffer.CpuAddr(); const DAddr buffer_end = buffer_start + buffer.SizeBytes(); @@ -583,77 +575,35 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { largest_copy = std::max(largest_copy, new_size); }; - ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download); + gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, + add_download); }); }); - } + }); } - committed_ranges.clear(); + committed_gpu_modified_ranges.clear(); if (downloads.empty()) { - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - async_buffers.emplace_back(std::optional<Async_Buffer>{}); - } + async_buffers.emplace_back(std::optional<Async_Buffer>{}); return; } - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); - boost::container::small_vector<BufferCopy, 4> normalized_copies; - IntervalSet new_async_range{}; - runtime.PreCopyBarrier(); - for (auto& [copy, buffer_id] : downloads) { - copy.dst_offset += download_staging.offset; - const std::array copies{copy}; - BufferCopy second_copy{copy}; - Buffer& buffer = slot_buffers[buffer_id]; - second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; - DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); - const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size}; - async_downloads += std::make_pair(base_interval, 1); - buffer.MarkUsage(copy.src_offset, copy.size); - runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); - normalized_copies.push_back(second_copy); - } - runtime.PostCopyBarrier(); - pending_downloads.emplace_back(std::move(normalized_copies)); - async_buffers.emplace_back(download_staging); - } else { - if (!Settings::IsGPULevelHigh()) { - committed_ranges.clear(); - uncommitted_ranges.clear(); - } else { - if constexpr (USE_MEMORY_MAPS) { - auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); - runtime.PreCopyBarrier(); - for (auto& [copy, buffer_id] : downloads) { - // Have in mind the staging buffer offset for the copy - copy.dst_offset += download_staging.offset; - const std::array copies{copy}; - Buffer& buffer = slot_buffers[buffer_id]; - buffer.MarkUsage(copy.src_offset, copy.size); - runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); - } - runtime.PostCopyBarrier(); - runtime.Finish(); - for (const auto& [copy, buffer_id] : downloads) { - const Buffer& buffer = slot_buffers[buffer_id]; - const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; - // Undo the modified offset - const u64 dst_offset = copy.dst_offset - download_staging.offset; - const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; - device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size); - } - } else { - const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); - for (const auto& [copy, buffer_id] : downloads) { - Buffer& buffer = slot_buffers[buffer_id]; - buffer.ImmediateDownload(copy.src_offset, - immediate_buffer.subspan(0, copy.size)); - const DAddr device_addr = buffer.CpuAddr() + copy.src_offset; - device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size); - } - } - } + auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); + boost::container::small_vector<BufferCopy, 4> normalized_copies; + runtime.PreCopyBarrier(); + for (auto& [copy, buffer_id] : downloads) { + copy.dst_offset += download_staging.offset; + const std::array copies{copy}; + BufferCopy second_copy{copy}; + Buffer& buffer = slot_buffers[buffer_id]; + second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; + const DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset); + async_downloads.Add(orig_device_addr, copy.size); + buffer.MarkUsage(copy.src_offset, copy.size); + runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); + normalized_copies.push_back(second_copy); } + runtime.PostCopyBarrier(); + pending_downloads.emplace_back(std::move(normalized_copies)); + async_buffers.emplace_back(download_staging); } template <class P> @@ -676,37 +626,31 @@ void BufferCache<P>::PopAsyncBuffers() { async_buffers.pop_front(); return; } - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - auto& downloads = pending_downloads.front(); - auto& async_buffer = async_buffers.front(); - u8* base = async_buffer->mapped_span.data(); - const size_t base_offset = async_buffer->offset; - for (const auto& copy : downloads) { - const DAddr device_addr = static_cast<DAddr>(copy.src_offset); - const u64 dst_offset = copy.dst_offset - base_offset; - const u8* read_mapped_memory = base + dst_offset; - ForEachInOverlapCounter( - async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) { - device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], - end - start); - if (count == 1) { - const IntervalType base_interval{start, end}; - common_ranges.subtract(base_interval); - } - }); - const IntervalType subtract_interval{device_addr, device_addr + copy.size}; - RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); - } - async_buffers_death_ring.emplace_back(*async_buffer); - async_buffers.pop_front(); - pending_downloads.pop_front(); + auto& downloads = pending_downloads.front(); + auto& async_buffer = async_buffers.front(); + u8* base = async_buffer->mapped_span.data(); + const size_t base_offset = async_buffer->offset; + for (const auto& copy : downloads) { + const DAddr device_addr = static_cast<DAddr>(copy.src_offset); + const u64 dst_offset = copy.dst_offset - base_offset; + const u8* read_mapped_memory = base + dst_offset; + async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) { + device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], + end - start); + }); + async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) { + gpu_modified_ranges.Subtract(start, end - start); + }); } + async_buffers_death_ring.emplace_back(*async_buffer); + async_buffers.pop_front(); + pending_downloads.pop_front(); } template <class P> bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) { bool is_dirty = false; - ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; }); + gpu_modified_ranges.ForEachInRange(addr, size, [&](DAddr, DAddr) { is_dirty = true; }); return is_dirty; } @@ -1320,10 +1264,8 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { template <class P> void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) { memory_tracker.MarkRegionAsGpuModified(device_addr, size); - - const IntervalType base_interval{device_addr, device_addr + size}; - common_ranges.add(base_interval); - uncommitted_ranges.add(base_interval); + gpu_modified_ranges.Add(device_addr, size); + uncommitted_gpu_modified_ranges.Add(device_addr, size); } template <class P> @@ -1600,9 +1542,8 @@ bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size, template <class P> void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer) { - const IntervalType subtract_interval{dest_address, dest_address + copy_size}; - ClearDownload(subtract_interval); - common_ranges.subtract(subtract_interval); + ClearDownload(dest_address, copy_size); + gpu_modified_ranges.Subtract(dest_address, copy_size); BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); auto& buffer = slot_buffers[buffer_id]; @@ -1652,12 +1593,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 largest_copy = std::max(largest_copy, new_size); }; - const DAddr start_address = device_addr_out; - const DAddr end_address = start_address + range_size; - ForEachInRangeSet(common_ranges, start_address, range_size, add_download); - const IntervalType subtract_interval{start_address, end_address}; - ClearDownload(subtract_interval); - common_ranges.subtract(subtract_interval); + gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download); + ClearDownload(device_addr_out, range_size); + gpu_modified_ranges.Subtract(device_addr_out, range_size); }); if (total_size_bytes == 0) { return; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 80dbb81e7..240e9f015 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -13,25 +13,15 @@ #include <unordered_map> #include <vector> -#include <boost/container/small_vector.hpp> -#define BOOST_NO_MT -#include <boost/pool/detail/mutex.hpp> -#undef BOOST_NO_MT -#include <boost/icl/interval.hpp> -#include <boost/icl/interval_base_set.hpp> -#include <boost/icl/interval_set.hpp> -#include <boost/icl/split_interval_map.hpp> -#include <boost/pool/pool.hpp> -#include <boost/pool/pool_alloc.hpp> -#include <boost/pool/poolfwd.hpp> - #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" #include "common/lru_cache.h" #include "common/microprofile.h" +#include "common/range_sets.h" #include "common/scope_exit.h" #include "common/settings.h" +#include "common/slot_vector.h" #include "video_core/buffer_cache/buffer_base.h" #include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" @@ -41,21 +31,15 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/surface.h" -#include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" -namespace boost { -template <typename T> -class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; -} - namespace VideoCommon { MICROPROFILE_DECLARE(GPU_PrepareBuffers); MICROPROFILE_DECLARE(GPU_BindUploadBuffers); MICROPROFILE_DECLARE(GPU_DownloadMemory); -using BufferId = SlotId; +using BufferId = Common::SlotId; using VideoCore::Surface::PixelFormat; using namespace Common::Literals; @@ -184,7 +168,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; - static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; @@ -202,34 +185,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf using Async_Buffer = typename P::Async_Buffer; using MemoryTracker = typename P::MemoryTracker; - using IntervalCompare = std::less<DAddr>; - using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>; - using IntervalAllocator = boost::fast_pool_allocator<DAddr>; - using IntervalSet = boost::icl::interval_set<DAddr>; - using IntervalType = typename IntervalSet::interval_type; - - template <typename Type> - struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { - // types - typedef counter_add_functor<Type> type; - typedef boost::icl::identity_based_inplace_combine<Type> base_type; - - // public member functions - void operator()(Type& current, const Type& added) const { - current += added; - if (current < base_type::identity_element()) { - current = base_type::identity_element(); - } - } - - // public static functions - static void version(Type&){}; - }; - - using OverlapCombine = counter_add_functor<int>; - using OverlapSection = boost::icl::inter_section<int>; - using OverlapCounter = boost::icl::split_interval_map<DAddr, int>; - struct OverlapResult { boost::container::small_vector<BufferId, 16> ids; DAddr begin; @@ -240,6 +195,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf public: explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_); + ~BufferCache(); + void TickFrame(); void WriteMemory(DAddr device_addr, u64 size); @@ -379,75 +336,6 @@ private: } } - template <typename Func> - void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) { - const DAddr start_address = device_addr; - const DAddr end_address = start_address + size; - const IntervalType search_interval{start_address, end_address}; - auto it = current_range.lower_bound(search_interval); - if (it == current_range.end()) { - return; - } - auto end_it = current_range.upper_bound(search_interval); - for (; it != end_it; it++) { - DAddr inter_addr_end = it->upper(); - DAddr inter_addr = it->lower(); - if (inter_addr_end > end_address) { - inter_addr_end = end_address; - } - if (inter_addr < start_address) { - inter_addr = start_address; - } - func(inter_addr, inter_addr_end); - } - } - - template <typename Func> - void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size, - Func&& func) { - const DAddr start_address = device_addr; - const DAddr end_address = start_address + size; - const IntervalType search_interval{start_address, end_address}; - auto it = current_range.lower_bound(search_interval); - if (it == current_range.end()) { - return; - } - auto end_it = current_range.upper_bound(search_interval); - for (; it != end_it; it++) { - auto& inter = it->first; - DAddr inter_addr_end = inter.upper(); - DAddr inter_addr = inter.lower(); - if (inter_addr_end > end_address) { - inter_addr_end = end_address; - } - if (inter_addr < start_address) { - inter_addr = start_address; - } - func(inter_addr, inter_addr_end, it->second); - } - } - - void RemoveEachInOverlapCounter(OverlapCounter& current_range, - const IntervalType search_interval, int subtract_value) { - bool any_removals = false; - current_range.add(std::make_pair(search_interval, subtract_value)); - do { - any_removals = false; - auto it = current_range.lower_bound(search_interval); - if (it == current_range.end()) { - return; - } - auto end_it = current_range.upper_bound(search_interval); - for (; it != end_it; it++) { - if (it->second <= 0) { - any_removals = true; - current_range.erase(it); - break; - } - } - } while (any_removals); - } - static bool IsRangeGranular(DAddr device_addr, size_t size) { return (device_addr & ~Core::DEVICE_PAGEMASK) == ((device_addr + size) & ~Core::DEVICE_PAGEMASK); @@ -552,14 +440,14 @@ private: [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; - void ClearDownload(IntervalType subtract_interval); + void ClearDownload(DAddr base_addr, u64 size); void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); Tegra::MaxwellDeviceMemoryManager& device_memory; - SlotVector<Buffer> slot_buffers; + Common::SlotVector<Buffer> slot_buffers; DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; @@ -567,13 +455,12 @@ private: u32 last_index_count = 0; MemoryTracker memory_tracker; - IntervalSet uncommitted_ranges; - IntervalSet common_ranges; - IntervalSet cached_ranges; - std::deque<IntervalSet> committed_ranges; + Common::RangeSet<DAddr> uncommitted_gpu_modified_ranges; + Common::RangeSet<DAddr> gpu_modified_ranges; + std::deque<Common::RangeSet<DAddr>> committed_gpu_modified_ranges; // Async Buffers - OverlapCounter async_downloads; + Common::OverlapRangeSet<DAddr> async_downloads; std::deque<std::optional<Async_Buffer>> async_buffers; std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; std::optional<Async_Buffer> current_buffer; diff --git a/src/video_core/capture.h b/src/video_core/capture.h new file mode 100644 index 000000000..8db14a8ec --- /dev/null +++ b/src/video_core/capture.h @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/alignment.h" +#include "common/bit_util.h" +#include "common/common_types.h" +#include "core/frontend/framebuffer_layout.h" +#include "video_core/surface.h" + +namespace VideoCore::Capture { + +constexpr u32 BlockHeight = 4; +constexpr u32 BlockDepth = 0; +constexpr u32 BppLog2 = 2; + +constexpr auto PixelFormat = Surface::PixelFormat::B8G8R8A8_UNORM; + +constexpr auto LinearWidth = Layout::ScreenUndocked::Width; +constexpr auto LinearHeight = Layout::ScreenUndocked::Height; +constexpr auto LinearDepth = 1U; +constexpr auto BytesPerPixel = 4U; + +constexpr auto TiledWidth = LinearWidth; +constexpr auto TiledHeight = Common::AlignUpLog2(LinearHeight, BlockHeight + BlockDepth + BppLog2); +constexpr auto TiledSize = TiledWidth * TiledHeight * (1 << BppLog2); + +constexpr Layout::FramebufferLayout Layout{ + .width = LinearWidth, + .height = LinearHeight, + .screen = {0, 0, LinearWidth, LinearHeight}, + .is_srgb = false, +}; + +} // namespace VideoCore::Capture diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 6a18b76fb..8b2a49de5 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h @@ -11,6 +11,12 @@ namespace Tegra { +enum class BlendMode { + Opaque, + Premultiplied, + Coverage, +}; + /** * Struct describing framebuffer configuration */ @@ -23,6 +29,7 @@ struct FramebufferConfig { Service::android::PixelFormat pixel_format{}; Service::android::BufferTransformFlags transform_flags{}; Common::Rectangle<int> crop_rect{}; + BlendMode blending{}; }; Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index f4a5d831c..8e663f2a8 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -347,6 +347,17 @@ struct GPU::Impl { WaitForSyncOperation(wait_fence); } + std::vector<u8> GetAppletCaptureBuffer() { + std::vector<u8> out; + + const auto wait_fence = + RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); }); + gpu_thread.TickGPU(); + WaitForSyncOperation(wait_fence); + + return out; + } + GPU& gpu; Core::System& system; Host1x::Host1x& host1x; @@ -505,6 +516,10 @@ void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, impl->RequestComposite(std::move(layers), std::move(fences)); } +std::vector<u8> GPU::GetAppletCaptureBuffer() { + return impl->GetAppletCaptureBuffer(); +} + u64 GPU::GetTicks() const { return impl->GetTicks(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index c4602ca37..ad535512c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -215,6 +215,8 @@ public: void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, std::vector<Service::Nvidia::NvFence>&& fences); + std::vector<u8> GetAppletCaptureBuffer(); + /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary /// core timing events. diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index c4c7a5883..e923bfa22 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -10,7 +10,7 @@ namespace Host1x { Host1x::Host1x(Core::System& system_) : system{system_}, syncpoint_manager{}, - memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, + memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 0, 12}, allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} Host1x::~Host1x() = default; diff --git a/src/video_core/host_shaders/fidelityfx_fsr.frag b/src/video_core/host_shaders/fidelityfx_fsr.frag index a266e1c4e..54eedb450 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.frag +++ b/src/video_core/host_shaders/fidelityfx_fsr.frag @@ -37,6 +37,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture; #define A_GPU 1 #define A_GLSL 1 +#define FSR_RCAS_PASSTHROUGH_ALPHA 1 #ifndef YUZU_USE_FP16 #include "ffx_a.h" @@ -71,9 +72,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture; #include "ffx_fsr1.h" -#if USE_RCAS - layout(location = 0) in vec2 frag_texcoord; -#endif +layout (location = 0) in vec2 frag_texcoord; layout (location = 0) out vec4 frag_color; void CurrFilter(AU2 pos) { @@ -81,22 +80,22 @@ void CurrFilter(AU2 pos) { #ifndef YUZU_USE_FP16 AF3 c; FsrEasuF(c, pos, Const0, Const1, Const2, Const3); - frag_color = AF4(c, 1.0); + frag_color = AF4(c, texture(InputTexture, frag_texcoord).a); #else AH3 c; FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - frag_color = AH4(c, 1.0); + frag_color = AH4(c, texture(InputTexture, frag_texcoord).a); #endif #endif #if USE_RCAS #ifndef YUZU_USE_FP16 - AF3 c; - FsrRcasF(c.r, c.g, c.b, pos, Const0); - frag_color = AF4(c, 1.0); + AF4 c; + FsrRcasF(c.r, c.g, c.b, c.a, pos, Const0); + frag_color = c; #else - AH3 c; - FsrRcasH(c.r, c.g, c.b, pos, Const0); - frag_color = AH4(c, 1.0); + AH4 c; + FsrRcasH(c.r, c.g, c.b, c.a, pos, Const0); + frag_color = c; #endif #endif } diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag index 9bffc20d5..192a602c1 100644 --- a/src/video_core/host_shaders/fxaa.frag +++ b/src/video_core/host_shaders/fxaa.frag @@ -71,5 +71,5 @@ vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) { } void main() { - frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0); + frag_color = vec4(FxaaPixelShader(posPos, input_texture), texture(input_texture, posPos.xy).a); } diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag index 16d22f58e..fc47d3810 100644 --- a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag +++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag @@ -31,6 +31,7 @@ layout (location = 0) uniform uvec4 constants[4]; #define A_GPU 1 #define A_GLSL 1 +#define FSR_RCAS_PASSTHROUGH_ALPHA 1 #ifdef YUZU_USE_FP16 #define A_HALF @@ -67,9 +68,7 @@ layout (location = 0) uniform uvec4 constants[4]; #include "ffx_fsr1.h" -#if USE_RCAS - layout(location = 0) in vec2 frag_texcoord; -#endif +layout (location = 0) in vec2 frag_texcoord; layout (location = 0) out vec4 frag_color; void CurrFilter(AU2 pos) @@ -78,22 +77,22 @@ void CurrFilter(AU2 pos) #ifndef YUZU_USE_FP16 AF3 c; FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]); - frag_color = AF4(c, 1.0); + frag_color = AF4(c, texture(InputTexture, frag_texcoord).a); #else AH3 c; FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]); - frag_color = AH4(c, 1.0); + frag_color = AH4(c, texture(InputTexture, frag_texcoord).a); #endif #endif #if USE_RCAS #ifndef YUZU_USE_FP16 - AF3 c; - FsrRcasF(c.r, c.g, c.b, pos, constants[0]); - frag_color = AF4(c, 1.0); + AF4 c; + FsrRcasF(c.r, c.g, c.b, c.a, pos, constants[0]); + frag_color = c; #else AH3 c; - FsrRcasH(c.r, c.g, c.b, pos, constants[0]); - frag_color = AH4(c, 1.0); + FsrRcasH(c.r, c.g, c.b, c.a, pos, constants[0]); + frag_color = c; #endif #endif } diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag index 5fd7ad297..096b4e4db 100644 --- a/src/video_core/host_shaders/opengl_present.frag +++ b/src/video_core/host_shaders/opengl_present.frag @@ -9,5 +9,5 @@ layout (location = 0) out vec4 color; layout (binding = 0) uniform sampler2D color_texture; void main() { - color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); + color = vec4(texture(color_texture, frag_tex_coord)); } diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag index c814629cf..a9d9d40a3 100644 --- a/src/video_core/host_shaders/present_bicubic.frag +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -52,5 +52,5 @@ vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { } void main() { - color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f); + color = textureBicubic(color_texture, frag_tex_coord); } diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index ad9bb76a4..78edeb9b4 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -46,14 +46,14 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) { } void main() { - vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0]; + vec4 base = texture(color_texture, vec2(frag_tex_coord)) * weight[0]; vec2 tex_offset = 1.0f / textureSize(color_texture, 0); // TODO(Blinkhawk): This code can be optimized through shader group instructions. - vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb; - vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb; - vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); - color = vec4(combination + base, 1.0f); + vec4 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset); + vec4 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset); + vec4 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset); + vec4 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)); + vec4 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f); + color = combination + base; } diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag index d369bef06..05d033310 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag @@ -6,5 +6,6 @@ #define YUZU_USE_FP16 #define USE_EASU 1 +#define VERSION 1 #include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag index 6f25ef00f..7ae11dd66 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag @@ -5,5 +5,6 @@ #extension GL_GOOGLE_include_directive : enable #define USE_EASU 1 +#define VERSION 1 #include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag index 0c953a900..c017214a5 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag @@ -6,5 +6,6 @@ #define YUZU_USE_FP16 #define USE_RCAS 1 +#define VERSION 1 #include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag index 02e9a27c6..976825f4b 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag @@ -5,5 +5,6 @@ #extension GL_GOOGLE_include_directive : enable #define USE_RCAS 1 +#define VERSION 1 #include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert index 249c9675a..c0e6e8537 100644 --- a/src/video_core/host_shaders/vulkan_present.vert +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -19,15 +19,13 @@ layout (push_constant) uniform PushConstants { // Any member of a push constant block that is declared as an // array must only be accessed with dynamically uniform indices. ScreenRectVertex GetVertex(int index) { - switch (index) { - case 0: - default: + if (index < 1) { return vertices[0]; - case 1: + } else if (index < 2) { return vertices[1]; - case 2: + } else if (index < 3) { return vertices[2]; - case 3: + } else { return vertices[3]; } } diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag index 79ea817c2..cea5dac9d 100644 --- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -5,7 +5,7 @@ #extension GL_GOOGLE_include_directive : enable -#define VERSION 1 +#define VERSION 2 #define YUZU_USE_FP16 #include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag index 9605bb58b..10ddf0401 100644 --- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -5,6 +5,6 @@ #extension GL_GOOGLE_include_directive : enable -#define VERSION 1 +#define VERSION 2 #include "opengl_present_scaleforce.frag" diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index a52f8e486..ffafc48ef 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -22,11 +22,12 @@ using Tegra::Memory::GuestMemoryFlags; std::atomic<size_t> MemoryManager::unique_identifier_generator{}; MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, - u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_) + u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_, + u64 page_bits_) : system{system_}, memory{memory_}, address_space_bits{address_space_bits_}, - page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{}, - page_table{address_space_bits, address_space_bits + page_bits - 38, - page_bits != big_page_bits ? page_bits : 0}, + split_address{split_address_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, + entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, + page_bits != big_page_bits ? page_bits : 0}, kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( 1, std::memory_order_acq_rel)}, accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { @@ -48,10 +49,10 @@ MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& entries.resize(page_table_size / 32, 0); } -MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, - u64 page_bits_) - : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_, - page_bits_) {} +MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, + GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_) + : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, split_address_, + big_page_bits_, page_bits_) {} MemoryManager::~MemoryManager() = default; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index c5255f36c..ac7c1472a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -36,10 +36,11 @@ namespace Tegra { class MemoryManager final { public: explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, - u64 big_page_bits_ = 16, u64 page_bits_ = 12); - explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, - u64 address_space_bits_ = 40, u64 big_page_bits_ = 16, + GPUVAddr split_address = 1ULL << 34, u64 big_page_bits_ = 16, u64 page_bits_ = 12); + explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, + u64 address_space_bits_ = 40, GPUVAddr split_address = 1ULL << 34, + u64 big_page_bits_ = 16, u64 page_bits_ = 12); ~MemoryManager(); size_t GetID() const { @@ -192,6 +193,7 @@ private: MaxwellDeviceMemoryManager& memory; const u64 address_space_bits; + GPUVAddr split_address; const u64 page_bits; u64 address_space_size; u64 page_size; diff --git a/src/video_core/present.h b/src/video_core/present.h new file mode 100644 index 000000000..4fdfcca68 --- /dev/null +++ b/src/video_core/present.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/settings.h" + +static inline Settings::ScalingFilter GetScalingFilter() { + return Settings::values.scaling_filter.GetValue(); +} + +static inline Settings::AntiAliasing GetAntiAliasing() { + return Settings::values.anti_aliasing.GetValue(); +} + +static inline Settings::ScalingFilter GetScalingFilterForAppletCapture() { + return Settings::ScalingFilter::Bilinear; +} + +static inline Settings::AntiAliasing GetAntiAliasingForAppletCapture() { + return Settings::AntiAliasing::None; +} + +struct PresentFilters { + Settings::ScalingFilter (*get_scaling_filter)(); + Settings::AntiAliasing (*get_anti_aliasing)(); +}; + +constexpr PresentFilters PresentFiltersForDisplay{ + .get_scaling_filter = &GetScalingFilter, + .get_anti_aliasing = &GetAntiAliasing, +}; + +constexpr PresentFilters PresentFiltersForAppletCapture{ + .get_scaling_filter = &GetScalingFilterForAppletCapture, + .get_anti_aliasing = &GetAntiAliasingForAppletCapture, +}; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 4861b123a..e1019f228 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -18,12 +18,12 @@ #include "common/assert.h" #include "common/settings.h" +#include "common/slot_vector.h" #include "video_core/control/channel_state_cache.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" -#include "video_core/texture_cache/slot_vector.h" namespace VideoCore { enum class QueryType { @@ -37,7 +37,7 @@ constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count); namespace VideoCommon { -using AsyncJobId = SlotId; +using AsyncJobId = Common::SlotId; static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0}; @@ -341,7 +341,7 @@ private: static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; static constexpr unsigned YUZU_PAGEBITS = 12; - SlotVector<AsyncJob> slot_async_jobs; + Common::SlotVector<AsyncJob> slot_async_jobs; VideoCore::RasterizerInterface& rasterizer; Tegra::MaxwellDeviceMemoryManager& device_memory; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 3ad180f67..67427f937 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -40,6 +40,9 @@ public: /// Finalize rendering the guest frame and draw into the presentation texture virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0; + /// Get the tiled applet layer capture buffer + virtual std::vector<u8> GetAppletCaptureBuffer() = 0; + [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; [[nodiscard]] virtual std::string GetDeviceVendor() const = 0; diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index c89daff53..e6147d66c 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp @@ -3,6 +3,7 @@ #include "core/frontend/emu_window.h" #include "core/frontend/graphics_context.h" +#include "video_core/capture.h" #include "video_core/renderer_null/renderer_null.h" namespace Null { @@ -22,4 +23,8 @@ void RendererNull::Composite(std::span<const Tegra::FramebufferConfig> framebuff render_window.OnFrameDisplayed(); } +std::vector<u8> RendererNull::GetAppletCaptureBuffer() { + return std::vector<u8>(VideoCore::Capture::TiledSize); +} + } // namespace Null diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h index 063b476bb..34dbe1e4f 100644 --- a/src/video_core/renderer_null/renderer_null.h +++ b/src/video_core/renderer_null/renderer_null.h @@ -19,6 +19,8 @@ public: void Composite(std::span<const Tegra::FramebufferConfig> framebuffer) override; + std::vector<u8> GetAppletCaptureBuffer() override; + VideoCore::RasterizerInterface* ReadRasterizer() override { return &m_rasterizer; } diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index 6ba8b214b..9260a4dc4 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/settings.h" +#include "video_core/present.h" #include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/present/filters.h" @@ -13,14 +14,14 @@ namespace OpenGL { BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_, StateTracker& state_tracker_, ProgramManager& program_manager_, - Device& device_) + Device& device_, const PresentFilters& filters_) : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), - program_manager(program_manager_), device(device_) {} + program_manager(program_manager_), device(device_), filters(filters_) {} BlitScreen::~BlitScreen() = default; void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers, - const Layout::FramebufferLayout& layout) { + const Layout::FramebufferLayout& layout, bool invert_y) { // TODO: Signal state tracker about these changes state_tracker.NotifyScreenDrawVertexArray(); state_tracker.NotifyPolygonModes(); @@ -56,22 +57,22 @@ void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffe glDepthRangeIndexed(0, 0.0, 0.0); while (layers.size() < framebuffers.size()) { - layers.emplace_back(rasterizer, device_memory); + layers.emplace_back(rasterizer, device_memory, filters); } CreateWindowAdapt(); - window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout); + window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout, invert_y); // TODO // program_manager.RestoreGuestPipeline(); } void BlitScreen::CreateWindowAdapt() { - if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) { + if (window_adapt && filters.get_scaling_filter() == current_window_adapt) { return; } - current_window_adapt = Settings::values.scaling_filter.GetValue(); + current_window_adapt = filters.get_scaling_filter(); switch (current_window_adapt) { case Settings::ScalingFilter::NearestNeighbor: window_adapt = MakeNearestNeighbor(device); diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h index 0c3d838f1..df2da9424 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.h +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -15,6 +15,8 @@ namespace Layout { struct FramebufferLayout; } +struct PresentFilters; + namespace Tegra { struct FramebufferConfig; } @@ -46,12 +48,12 @@ public: explicit BlitScreen(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory, StateTracker& state_tracker, ProgramManager& program_manager, - Device& device); + Device& device, const PresentFilters& filters); ~BlitScreen(); /// Draws the emulated screens to the emulator window. void DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers, - const Layout::FramebufferLayout& layout); + const Layout::FramebufferLayout& layout, bool invert_y); private: void CreateWindowAdapt(); @@ -61,6 +63,7 @@ private: StateTracker& state_tracker; ProgramManager& program_manager; Device& device; + const PresentFilters& filters; Settings::ScalingFilter current_window_adapt{}; std::unique_ptr<WindowAdaptPass> window_adapt; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index af34c272b..fd471e979 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -90,7 +90,7 @@ public: void PostCopyBarrier(); void Finish(); - void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {} + void TickFrame(Common::SlotVector<Buffer>&) noexcept {} void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); @@ -251,7 +251,6 @@ struct BufferCacheParams { static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; - static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; // TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3e54edcc2..d4165d8e4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -30,13 +30,13 @@ class Image; class ImageView; class Sampler; +using Common::SlotVector; using VideoCommon::ImageId; using VideoCommon::ImageViewId; using VideoCommon::ImageViewType; using VideoCommon::NUM_RT; using VideoCommon::Region2D; using VideoCommon::RenderTargets; -using VideoCommon::SlotVector; struct FormatProperties { GLenum compatibility_class; diff --git a/src/video_core/renderer_opengl/present/layer.cpp b/src/video_core/renderer_opengl/present/layer.cpp index 8643e07c6..6c7092d22 100644 --- a/src/video_core/renderer_opengl/present/layer.cpp +++ b/src/video_core/renderer_opengl/present/layer.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "video_core/framebuffer_config.h" +#include "video_core/present.h" #include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/present/fsr.h" @@ -14,8 +15,9 @@ namespace OpenGL { -Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_) - : rasterizer(rasterizer_), device_memory(device_memory_) { +Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_, + const PresentFilters& filters_) + : rasterizer(rasterizer_), device_memory(device_memory_), filters(filters_) { // Allocate textures for the screen framebuffer_texture.resource.Create(GL_TEXTURE_2D); @@ -34,12 +36,12 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix, std::array<ScreenRectVertex, 4>& out_vertices, ProgramManager& program_manager, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout) { + const Layout::FramebufferLayout& layout, bool invert_y) { FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); GLuint texture = info.display_texture; - auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + auto anti_aliasing = filters.get_anti_aliasing(); if (anti_aliasing != Settings::AntiAliasing::None) { glEnablei(GL_SCISSOR_TEST, 0); auto viewport_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); @@ -64,7 +66,7 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix, glDisablei(GL_SCISSOR_TEST, 0); - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) { if (!fsr || fsr->NeedsRecreation(layout.screen)) { fsr = std::make_unique<FSR>(layout.screen.GetWidth(), layout.screen.GetHeight()); } @@ -83,10 +85,15 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix, const auto w = screen.GetWidth(); const auto h = screen.GetHeight(); - out_vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); - out_vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); - out_vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); - out_vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); + const auto left = crop.left; + const auto right = crop.right; + const auto top = invert_y ? crop.bottom : crop.top; + const auto bottom = invert_y ? crop.top : crop.bottom; + + out_vertices[0] = ScreenRectVertex(x, y, left, top); + out_vertices[1] = ScreenRectVertex(x + w, y, right, top); + out_vertices[2] = ScreenRectVertex(x, y + h, left, bottom); + out_vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom); return texture; } @@ -131,10 +138,12 @@ FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig& const u64 size_in_bytes{Tegra::Texture::CalculateSize( true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; - const std::span<const u8> input_data(host_ptr, size_in_bytes); - Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, - framebuffer.width, framebuffer.height, 1, block_height_log2, - 0); + if (host_ptr) { + const std::span<const u8> input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, + block_height_log2, 0); + } glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); diff --git a/src/video_core/renderer_opengl/present/layer.h b/src/video_core/renderer_opengl/present/layer.h index ef1055abf..5b15b730f 100644 --- a/src/video_core/renderer_opengl/present/layer.h +++ b/src/video_core/renderer_opengl/present/layer.h @@ -13,6 +13,8 @@ namespace Layout { struct FramebufferLayout; } +struct PresentFilters; + namespace Service::android { enum class PixelFormat : u32; }; @@ -44,14 +46,15 @@ struct ScreenRectVertex; class Layer { public: - explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory); + explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory, + const PresentFilters& filters); ~Layer(); GLuint ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix, std::array<ScreenRectVertex, 4>& out_vertices, ProgramManager& program_manager, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout& layout); + const Layout::FramebufferLayout& layout, bool invert_y); private: /// Loads framebuffer from emulated memory into the active OpenGL texture. @@ -65,6 +68,7 @@ private: private: RasterizerOpenGL& rasterizer; Tegra::MaxwellDeviceMemoryManager& device_memory; + const PresentFilters& filters; /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp index 4d681606b..d8b6a11cb 100644 --- a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp @@ -37,7 +37,7 @@ WindowAdaptPass::~WindowAdaptPass() = default; void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers, std::span<const Tegra::FramebufferConfig> framebuffers, - const Layout::FramebufferLayout& layout) { + const Layout::FramebufferLayout& layout, bool invert_y) { GLint old_read_fb; GLint old_draw_fb; glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); @@ -51,7 +51,7 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li auto layer_it = layers.begin(); for (size_t i = 0; i < layer_count; i++) { textures[i] = layer_it->ConfigureDraw(matrices[i], vertices[i], program_manager, - framebuffers[i], layout); + framebuffers[i], layout, invert_y); layer_it++; } @@ -92,6 +92,21 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li glClear(GL_COLOR_BUFFER_BIT); for (size_t i = 0; i < layer_count; i++) { + switch (framebuffers[i].blending) { + case Tegra::BlendMode::Opaque: + default: + glDisablei(GL_BLEND, 0); + break; + case Tegra::BlendMode::Premultiplied: + glEnablei(GL_BLEND, 0); + glBlendFuncSeparatei(0, GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO); + break; + case Tegra::BlendMode::Coverage: + glEnablei(GL_BLEND, 0); + glBlendFuncSeparatei(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO); + break; + } + glBindTextureUnit(0, textures[i]); glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE, matrices[i].data()); diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.h b/src/video_core/renderer_opengl/present/window_adapt_pass.h index 00975a9c6..0a8bcef2f 100644 --- a/src/video_core/renderer_opengl/present/window_adapt_pass.h +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.h @@ -31,7 +31,7 @@ public: void DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers, std::span<const Tegra::FramebufferConfig> framebuffers, - const Layout::FramebufferLayout& layout); + const Layout::FramebufferLayout& layout, bool invert_y); private: const Device& device; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e33a32592..5fb54635d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -16,6 +16,8 @@ #include "core/core_timing.h" #include "core/frontend/emu_window.h" #include "core/telemetry_session.h" +#include "video_core/capture.h" +#include "video_core/present.h" #include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -120,7 +122,15 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); } blit_screen = std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker, - program_manager, device); + program_manager, device, PresentFiltersForDisplay); + blit_applet = + std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker, program_manager, + device, PresentFiltersForAppletCapture); + capture_framebuffer.Create(); + capture_renderbuffer.Create(); + glBindRenderbuffer(GL_RENDERBUFFER, capture_renderbuffer.handle); + glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, VideoCore::Capture::LinearWidth, + VideoCore::Capture::LinearHeight); } RendererOpenGL::~RendererOpenGL() = default; @@ -130,10 +140,11 @@ void RendererOpenGL::Composite(std::span<const Tegra::FramebufferConfig> framebu return; } + RenderAppletCaptureLayer(framebuffers); RenderScreenshot(framebuffers); state_tracker.BindFramebuffer(0); - blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout()); + blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout(), false); ++m_current_frame; @@ -159,11 +170,8 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) { - if (!renderer_settings.screenshot_requested) { - return; - } - +void RendererOpenGL::RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout, void* dst) { GLint old_read_fb; GLint old_draw_fb; glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); @@ -173,29 +181,86 @@ void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> screenshot_framebuffer.Create(); glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle); - const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - GLuint renderbuffer; glGenRenderbuffers(1, &renderbuffer); glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - blit_screen->DrawScreen(framebuffers, layout); + blit_screen->DrawScreen(framebuffers, layout, false); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); - glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, - renderer_settings.screenshot_bits); + glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, dst); screenshot_framebuffer.Release(); glDeleteRenderbuffers(1, &renderbuffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); +} + +void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (!renderer_settings.screenshot_requested) { + return; + } + + RenderToBuffer(framebuffers, renderer_settings.screenshot_framebuffer_layout, + renderer_settings.screenshot_bits); renderer_settings.screenshot_complete_callback(true); renderer_settings.screenshot_requested = false; } +void RendererOpenGL::RenderAppletCaptureLayer( + std::span<const Tegra::FramebufferConfig> framebuffers) { + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + + glBindFramebuffer(GL_FRAMEBUFFER, capture_framebuffer.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + capture_renderbuffer.handle); + + blit_applet->DrawScreen(framebuffers, VideoCore::Capture::Layout, true); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); +} + +std::vector<u8> RendererOpenGL::GetAppletCaptureBuffer() { + using namespace VideoCore::Capture; + + std::vector<u8> linear(TiledSize); + std::vector<u8> out(TiledSize); + + GLint old_read_fb; + GLint old_draw_fb; + GLint old_pixel_pack_buffer; + GLint old_pack_row_length; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + glGetIntegerv(GL_PIXEL_PACK_BUFFER_BINDING, &old_pixel_pack_buffer); + glGetIntegerv(GL_PACK_ROW_LENGTH, &old_pack_row_length); + + glBindFramebuffer(GL_FRAMEBUFFER, capture_framebuffer.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + capture_renderbuffer.handle); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + glReadPixels(0, 0, LinearWidth, LinearHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, + linear.data()); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + glBindBuffer(GL_PIXEL_PACK_BUFFER, old_pixel_pack_buffer); + glPixelStorei(GL_PACK_ROW_LENGTH, old_pack_row_length); + + Tegra::Texture::SwizzleTexture(out, linear, BytesPerPixel, LinearWidth, LinearHeight, + LinearDepth, BlockHeight, BlockDepth); + + return out; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c4625c96e..60d6a1477 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -42,6 +42,8 @@ public: void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override; + std::vector<u8> GetAppletCaptureBuffer() override; + VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; } @@ -52,7 +54,11 @@ public: private: void AddTelemetryFields(); + + void RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout, void* dst); void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers); + void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers); Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; @@ -64,8 +70,11 @@ private: ProgramManager program_manager; RasterizerOpenGL rasterizer; OGLFramebuffer screenshot_framebuffer; + OGLFramebuffer capture_framebuffer; + OGLRenderbuffer capture_renderbuffer; std::unique_ptr<BlitScreen> blit_screen; + std::unique_ptr<BlitScreen> blit_applet; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index cfc04be44..3847a9a13 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "video_core/present.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "common/settings.h" @@ -48,12 +49,12 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, Tegra::MaxwellDeviceMemoryManager& device_memory_, size_t image_count_, - VkExtent2D output_size, VkDescriptorSetLayout layout) + VkExtent2D output_size, VkDescriptorSetLayout layout, const PresentFilters& filters_) : device(device_), memory_allocator(memory_allocator_), scheduler(scheduler_), - device_memory(device_memory_), image_count(image_count_) { + device_memory(device_memory_), filters(filters_), image_count(image_count_) { CreateDescriptorPool(); CreateDescriptorSets(layout); - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) { CreateFSR(output_size); } } @@ -171,11 +172,11 @@ void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { } void Layer::SetAntiAliasPass() { - if (anti_alias && anti_alias_setting == Settings::values.anti_aliasing.GetValue()) { + if (anti_alias && anti_alias_setting == filters.get_anti_aliasing()) { return; } - anti_alias_setting = Settings::values.anti_aliasing.GetValue(); + anti_alias_setting = filters.get_anti_aliasing(); const VkExtent2D render_area{ .width = Settings::values.resolution_info.ScaleUp(raw_width), @@ -270,9 +271,11 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i const u64 linear_size{GetSizeInBytes(framebuffer)}; const u64 tiled_size{Tegra::Texture::CalculateSize( true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; - Tegra::Texture::UnswizzleTexture( - mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), - bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + if (host_ptr) { + Tegra::Texture::UnswizzleTexture( + mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + } const VkBufferImageCopy copy{ .bufferOffset = image_offset, diff --git a/src/video_core/renderer_vulkan/present/layer.h b/src/video_core/renderer_vulkan/present/layer.h index 88d43fc5f..f5effdcd7 100644 --- a/src/video_core/renderer_vulkan/present/layer.h +++ b/src/video_core/renderer_vulkan/present/layer.h @@ -11,6 +11,8 @@ namespace Layout { struct FramebufferLayout; } +struct PresentFilters; + namespace Tegra { struct FramebufferConfig; } @@ -37,7 +39,8 @@ class Layer final { public: explicit Layer(const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler, Tegra::MaxwellDeviceMemoryManager& device_memory, size_t image_count, - VkExtent2D output_size, VkDescriptorSetLayout layout); + VkExtent2D output_size, VkDescriptorSetLayout layout, + const PresentFilters& filters); ~Layer(); void ConfigureDraw(PresentPushConstants* out_push_constants, @@ -71,6 +74,7 @@ private: MemoryAllocator& memory_allocator; Scheduler& scheduler; Tegra::MaxwellDeviceMemoryManager& device_memory; + const PresentFilters& filters; const size_t image_count{}; vk::DescriptorPool descriptor_pool{}; vk::DescriptorSets descriptor_sets{}; diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 6ee16595d..7f27c7c1b 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -362,10 +362,10 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, }); } -vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, - vk::PipelineLayout& layout, - std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders, - bool enable_blending) { +static vk::Pipeline CreateWrappedPipelineImpl( + const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders, + VkPipelineColorBlendAttachmentState blending) { const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -443,30 +443,6 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp .alphaToOneEnable = VK_FALSE, }; - constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{ - .blendEnable = VK_FALSE, - .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - - constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{ - .blendEnable = VK_TRUE, - .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, - .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .pNext = nullptr, @@ -474,8 +450,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, .attachmentCount = 1, - .pAttachments = - enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled, + .pAttachments = &blending, .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, }; @@ -515,6 +490,63 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp }); } +vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, + vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) { + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + return CreateWrappedPipelineImpl(device, renderpass, layout, shaders, + color_blend_attachment_disabled); +} + +vk::Pipeline CreateWrappedPremultipliedBlendingPipeline( + const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) { + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_premultiplied{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ONE, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + return CreateWrappedPipelineImpl(device, renderpass, layout, shaders, + color_blend_attachment_premultiplied); +} + +vk::Pipeline CreateWrappedCoverageBlendingPipeline( + const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) { + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_coverage{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + + return CreateWrappedPipelineImpl(device, renderpass, layout, shaders, + color_blend_attachment_coverage); +} + VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images, VkSampler sampler, VkImageView view, VkDescriptorSet set, u32 binding) { diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index 1104aaa15..5b22f0fa8 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -42,8 +42,13 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, vk::DescriptorSetLayout& layout); vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, - std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders, - bool enable_blending = false); + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders); +vk::Pipeline CreateWrappedPremultipliedBlendingPipeline( + const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders); +vk::Pipeline CreateWrappedCoverageBlendingPipeline( + const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders); VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images, VkSampler sampler, VkImageView view, VkDescriptorSet set, u32 binding); diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp index c5db0230d..22ffacf11 100644 --- a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp @@ -22,7 +22,7 @@ WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format, CreatePipelineLayout(); CreateVertexShader(); CreateRenderPass(frame_format); - CreatePipeline(); + CreatePipelines(); } WindowAdaptPass::~WindowAdaptPass() = default; @@ -34,7 +34,6 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s const VkFramebuffer host_framebuffer{*dst->framebuffer}; const VkRenderPass renderpass{*render_pass}; - const VkPipeline graphics_pipeline{*pipeline}; const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout}; const VkExtent2D render_area{ .width = dst->width, @@ -44,9 +43,23 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s const size_t layer_count = configs.size(); std::vector<PresentPushConstants> push_constants(layer_count); std::vector<VkDescriptorSet> descriptor_sets(layer_count); + std::vector<VkPipeline> graphics_pipelines(layer_count); auto layer_it = layers.begin(); for (size_t i = 0; i < layer_count; i++) { + switch (configs[i].blending) { + case Tegra::BlendMode::Opaque: + default: + graphics_pipelines[i] = *opaque_pipeline; + break; + case Tegra::BlendMode::Premultiplied: + graphics_pipelines[i] = *premultiplied_pipeline; + break; + case Tegra::BlendMode::Coverage: + graphics_pipelines[i] = *coverage_pipeline; + break; + } + layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler, image_index, configs[i], layout); layer_it++; @@ -77,8 +90,8 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area); cmdbuf.ClearAttachments({clear_attachment}, {clear_rect}); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); for (size_t i = 0; i < layer_count; i++) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipelines[i]); cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants[i]); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0, @@ -129,9 +142,13 @@ void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) { render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED); } -void WindowAdaptPass::CreatePipeline() { - pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout, - std::tie(vertex_shader, fragment_shader), false); +void WindowAdaptPass::CreatePipelines() { + opaque_pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout, + std::tie(vertex_shader, fragment_shader)); + premultiplied_pipeline = CreateWrappedPremultipliedBlendingPipeline( + device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader)); + coverage_pipeline = CreateWrappedCoverageBlendingPipeline( + device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader)); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.h b/src/video_core/renderer_vulkan/present/window_adapt_pass.h index 0e2edfc31..cf667a4fc 100644 --- a/src/video_core/renderer_vulkan/present/window_adapt_pass.h +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.h @@ -42,7 +42,7 @@ private: void CreatePipelineLayout(); void CreateVertexShader(); void CreateRenderPass(VkFormat frame_format); - void CreatePipeline(); + void CreatePipelines(); private: const Device& device; @@ -52,7 +52,9 @@ private: vk::ShaderModule vertex_shader; vk::ShaderModule fragment_shader; vk::RenderPass render_pass; - vk::Pipeline pipeline; + vk::Pipeline opaque_pipeline; + vk::Pipeline premultiplied_pipeline; + vk::Pipeline coverage_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 48a105327..d50417116 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -19,7 +19,9 @@ #include "core/core_timing.h" #include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" +#include "video_core/capture.h" #include "video_core/gpu.h" +#include "video_core/present.h" #include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" @@ -38,6 +40,20 @@ namespace Vulkan { namespace { + +constexpr VkExtent2D CaptureImageSize{ + .width = VideoCore::Capture::LinearWidth, + .height = VideoCore::Capture::LinearHeight, +}; + +constexpr VkExtent3D CaptureImageExtent{ + .width = VideoCore::Capture::LinearWidth, + .height = VideoCore::Capture::LinearHeight, + .depth = VideoCore::Capture::LinearDepth, +}; + +constexpr VkFormat CaptureFormat = VK_FORMAT_A8B8G8R8_UNORM_PACK32; + std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); @@ -99,10 +115,15 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, render_window.GetFramebufferLayout().height), present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, surface), - blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler), - blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler), + blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler, + PresentFiltersForDisplay), + blit_capture(device_memory, device, memory_allocator, present_manager, scheduler, + PresentFiltersForDisplay), + blit_applet(device_memory, device, memory_allocator, present_manager, scheduler, + PresentFiltersForAppletCapture), rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, - scheduler) { + scheduler), + applet_frame() { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); @@ -125,6 +146,8 @@ void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebu SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + RenderAppletCaptureLayer(framebuffers); + if (!render_window.IsShown()) { return; } @@ -167,30 +190,20 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -void Vulkan::RendererVulkan::RenderScreenshot( - std::span<const Tegra::FramebufferConfig> framebuffers) { - if (!renderer_settings.screenshot_requested) { - return; - } - - constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM}; - const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - +vk::Buffer RendererVulkan::RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout, VkFormat format, + VkDeviceSize buffer_size) { auto frame = [&]() { Frame f{}; - f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, - ScreenshotFormat); - f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat); - f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat); + f.image = + CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, format); + f.image_view = CreateWrappedImageView(device, f.image, format); + f.framebuffer = blit_capture.CreateFramebuffer(layout, *f.image_view, format); return f; }(); - blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, - VK_FORMAT_B8G8R8A8_UNORM); - - const auto dst_buffer = CreateWrappedBuffer( - memory_allocator, static_cast<VkDeviceSize>(layout.width * layout.height * 4), - MemoryUsage::Download); + auto dst_buffer = CreateWrappedBuffer(memory_allocator, buffer_size, MemoryUsage::Download); + blit_capture.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, format); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([&](vk::CommandBuffer cmdbuf) { @@ -198,15 +211,68 @@ void Vulkan::RendererVulkan::RenderScreenshot( VkExtent3D{layout.width, layout.height, 1}); }); - // Ensure the copy is fully completed before saving the screenshot + // Ensure the copy is fully completed before saving the capture scheduler.Finish(); - // Copy backing image data to the QImage screenshot buffer + // Copy backing image data to the capture buffer dst_buffer.Invalidate(); + return dst_buffer; +} + +void RendererVulkan::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (!renderer_settings.screenshot_requested) { + return; + } + + const auto& layout{renderer_settings.screenshot_framebuffer_layout}; + const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM, + layout.width * layout.height * 4); + std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(), dst_buffer.Mapped().size()); renderer_settings.screenshot_complete_callback(false); renderer_settings.screenshot_requested = false; } +std::vector<u8> RendererVulkan::GetAppletCaptureBuffer() { + using namespace VideoCore::Capture; + + std::vector<u8> out(VideoCore::Capture::TiledSize); + + if (!applet_frame.image) { + return out; + } + + const auto dst_buffer = + CreateWrappedBuffer(memory_allocator, VideoCore::Capture::TiledSize, MemoryUsage::Download); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + DownloadColorImage(cmdbuf, *applet_frame.image, *dst_buffer, CaptureImageExtent); + }); + + // Ensure the copy is fully completed before writing the capture + scheduler.Finish(); + + // Swizzle image data to the capture buffer + dst_buffer.Invalidate(); + Tegra::Texture::SwizzleTexture(out, dst_buffer.Mapped(), BytesPerPixel, LinearWidth, + LinearHeight, LinearDepth, BlockHeight, BlockDepth); + + return out; +} + +void RendererVulkan::RenderAppletCaptureLayer( + std::span<const Tegra::FramebufferConfig> framebuffers) { + if (!applet_frame.image) { + applet_frame.image = CreateWrappedImage(memory_allocator, CaptureImageSize, CaptureFormat); + applet_frame.image_view = CreateWrappedImageView(device, applet_frame.image, CaptureFormat); + applet_frame.framebuffer = blit_applet.CreateFramebuffer( + VideoCore::Capture::Layout, *applet_frame.image_view, CaptureFormat); + } + + blit_applet.DrawToFrame(rasterizer, &applet_frame, framebuffers, VideoCore::Capture::Layout, 1, + CaptureFormat); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index c6d8a0f21..fb9d83412 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -48,6 +48,8 @@ public: void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override; + std::vector<u8> GetAppletCaptureBuffer() override; + VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; } @@ -59,7 +61,11 @@ public: private: void Report() const; + vk::Buffer RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout, VkFormat format, + VkDeviceSize buffer_size); void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers); + void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers); Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; @@ -79,9 +85,12 @@ private: Swapchain swapchain; PresentManager present_manager; BlitScreen blit_swapchain; - BlitScreen blit_screenshot; + BlitScreen blit_capture; + BlitScreen blit_applet; RasterizerVulkan rasterizer; std::optional<TurboMode> turbo_mode; + + Frame applet_frame; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2275fcc46..b7797f833 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "video_core/framebuffer_config.h" +#include "video_core/present.h" #include "video_core/renderer_vulkan/present/filters.h" #include "video_core/renderer_vulkan/present/layer.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" @@ -12,9 +13,9 @@ namespace Vulkan { BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, MemoryAllocator& memory_allocator_, PresentManager& present_manager_, - Scheduler& scheduler_) + Scheduler& scheduler_, const PresentFilters& filters_) : device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_}, - present_manager{present_manager_}, scheduler{scheduler_}, image_count{1}, + present_manager{present_manager_}, scheduler{scheduler_}, filters{filters_}, image_count{1}, swapchain_view_format{VK_FORMAT_B8G8R8A8_UNORM} {} BlitScreen::~BlitScreen() = default; @@ -27,7 +28,7 @@ void BlitScreen::WaitIdle() { void BlitScreen::SetWindowAdaptPass() { layers.clear(); - scaling_filter = Settings::values.scaling_filter.GetValue(); + scaling_filter = filters.get_scaling_filter(); switch (scaling_filter) { case Settings::ScalingFilter::NearestNeighbor: @@ -59,7 +60,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, bool presentation_recreate_required = false; // Recreate dynamic resources if the adapting filter changed - if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue()) { + if (!window_adapt || scaling_filter != filters.get_scaling_filter()) { resource_update_required = true; } @@ -102,7 +103,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, while (layers.size() < framebuffers.size()) { layers.emplace_back(device, memory_allocator, scheduler, device_memory, image_count, - window_size, window_adapt->GetDescriptorSetLayout()); + window_size, window_adapt->GetDescriptorSetLayout(), filters); } // Perform the draw @@ -119,8 +120,7 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& l VkFormat current_view_format) { const bool format_updated = std::exchange(swapchain_view_format, current_view_format) != current_view_format; - if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue() || - format_updated) { + if (!window_adapt || scaling_filter != filters.get_scaling_filter() || format_updated) { WaitIdle(); SetWindowAdaptPass(); } diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index cbdf2d5d0..531c57fc5 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -16,6 +16,8 @@ namespace Core { class System; } +struct PresentFilters; + namespace Tegra { struct FramebufferConfig; } @@ -47,7 +49,7 @@ class BlitScreen { public: explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, const Device& device, MemoryAllocator& memory_allocator, PresentManager& present_manager, - Scheduler& scheduler); + Scheduler& scheduler, const PresentFilters& filters); ~BlitScreen(); void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, @@ -70,6 +72,7 @@ private: MemoryAllocator& memory_allocator; PresentManager& present_manager; Scheduler& scheduler; + const PresentFilters& filters; std::size_t image_count{}; std::size_t image_index{}; VkFormat swapchain_view_format{}; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 31001d142..e5e1e3ab6 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -368,7 +368,7 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const { return static_cast<u32>(device.GetStorageBufferAlignment()); } -void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept { +void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noexcept { for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { it->ResetUsageTracking(); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index e273f4988..efe960258 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -81,7 +81,7 @@ public: ComputePassDescriptorQueue& compute_pass_descriptor_queue, DescriptorPool& descriptor_pool); - void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept; + void TickFrame(Common::SlotVector<Buffer>& slot_buffers) noexcept; void Finish(); @@ -181,7 +181,6 @@ struct BufferCacheParams { static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; - static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0dbde65d6..aaeb5ef93 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -20,11 +20,11 @@ struct ResolutionScalingInfo; namespace Vulkan { +using Common::SlotVector; using VideoCommon::ImageId; using VideoCommon::NUM_RT; using VideoCommon::Region2D; using VideoCommon::RenderTargets; -using VideoCommon::SlotVector; using VideoCore::Surface::PixelFormat; class BlitImageHelper; diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h deleted file mode 100644 index 3ffa2a661..000000000 --- a/src/video_core/texture_cache/slot_vector.h +++ /dev/null @@ -1,227 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include <algorithm> -#include <bit> -#include <numeric> -#include <type_traits> -#include <utility> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/polyfill_ranges.h" - -namespace VideoCommon { - -struct SlotId { - static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max(); - - constexpr auto operator<=>(const SlotId&) const noexcept = default; - - constexpr explicit operator bool() const noexcept { - return index != INVALID_INDEX; - } - - u32 index = INVALID_INDEX; -}; - -template <class T> - requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T> -class SlotVector { -public: - class Iterator { - friend SlotVector<T>; - - public: - constexpr Iterator() = default; - - Iterator& operator++() noexcept { - const u64* const bitset = slot_vector->stored_bitset.data(); - const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64; - if (id.index < size) { - do { - ++id.index; - } while (id.index < size && !IsValid(bitset)); - if (id.index == size) { - id.index = SlotId::INVALID_INDEX; - } - } - return *this; - } - - Iterator operator++(int) noexcept { - const Iterator copy{*this}; - ++*this; - return copy; - } - - bool operator==(const Iterator& other) const noexcept { - return id.index == other.id.index; - } - - bool operator!=(const Iterator& other) const noexcept { - return id.index != other.id.index; - } - - std::pair<SlotId, T*> operator*() const noexcept { - return {id, std::addressof((*slot_vector)[id])}; - } - - T* operator->() const noexcept { - return std::addressof((*slot_vector)[id]); - } - - private: - Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept - : slot_vector{slot_vector_}, id{id_} {} - - bool IsValid(const u64* bitset) const noexcept { - return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; - } - - SlotVector<T>* slot_vector; - SlotId id; - }; - - ~SlotVector() noexcept { - size_t index = 0; - for (u64 bits : stored_bitset) { - for (size_t bit = 0; bits; ++bit, bits >>= 1) { - if ((bits & 1) != 0) { - values[index + bit].object.~T(); - } - } - index += 64; - } - delete[] values; - } - - [[nodiscard]] T& operator[](SlotId id) noexcept { - ValidateIndex(id); - return values[id.index].object; - } - - [[nodiscard]] const T& operator[](SlotId id) const noexcept { - ValidateIndex(id); - return values[id.index].object; - } - - template <typename... Args> - [[nodiscard]] SlotId insert(Args&&... args) noexcept { - const u32 index = FreeValueIndex(); - new (&values[index].object) T(std::forward<Args>(args)...); - SetStorageBit(index); - - return SlotId{index}; - } - - void erase(SlotId id) noexcept { - values[id.index].object.~T(); - free_list.push_back(id.index); - ResetStorageBit(id.index); - } - - [[nodiscard]] Iterator begin() noexcept { - const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); - if (it == stored_bitset.end()) { - return end(); - } - const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin())); - const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))}; - return Iterator(this, first_id); - } - - [[nodiscard]] Iterator end() noexcept { - return Iterator(this, SlotId{SlotId::INVALID_INDEX}); - } - - [[nodiscard]] size_t size() const noexcept { - return values_capacity - free_list.size(); - } - -private: - struct NonTrivialDummy { - NonTrivialDummy() noexcept {} - }; - - union Entry { - Entry() noexcept : dummy{} {} - ~Entry() noexcept {} - - NonTrivialDummy dummy; - T object; - }; - - void SetStorageBit(u32 index) noexcept { - stored_bitset[index / 64] |= u64(1) << (index % 64); - } - - void ResetStorageBit(u32 index) noexcept { - stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); - } - - bool ReadStorageBit(u32 index) noexcept { - return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; - } - - void ValidateIndex(SlotId id) const noexcept { - DEBUG_ASSERT(id); - DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); - DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); - } - - [[nodiscard]] u32 FreeValueIndex() noexcept { - if (free_list.empty()) { - Reserve(values_capacity ? (values_capacity << 1) : 1); - } - const u32 free_index = free_list.back(); - free_list.pop_back(); - return free_index; - } - - void Reserve(size_t new_capacity) noexcept { - Entry* const new_values = new Entry[new_capacity]; - size_t index = 0; - for (u64 bits : stored_bitset) { - for (size_t bit = 0; bits; ++bit, bits >>= 1) { - const size_t i = index + bit; - if ((bits & 1) == 0) { - continue; - } - T& old_value = values[i].object; - new (&new_values[i].object) T(std::move(old_value)); - old_value.~T(); - } - index += 64; - } - - stored_bitset.resize((new_capacity + 63) / 64); - - const size_t old_free_size = free_list.size(); - free_list.resize(old_free_size + (new_capacity - values_capacity)); - std::iota(free_list.begin() + old_free_size, free_list.end(), - static_cast<u32>(values_capacity)); - - delete[] values; - values = new_values; - values_capacity = new_capacity; - } - - Entry* values = nullptr; - size_t values_capacity = 0; - - std::vector<u64> stored_bitset; - std::vector<u32> free_list; -}; - -} // namespace VideoCommon - -template <> -struct std::hash<VideoCommon::SlotId> { - size_t operator()(const VideoCommon::SlotId& id) const noexcept { - return std::hash<u32>{}(id.index); - } -}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a20c956ff..3a1cc060e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -746,7 +746,13 @@ std::pair<typename P::ImageView*, bool> TextureCache<P>::TryFindFramebufferImage }(); const auto GetImageViewForFramebuffer = [&](ImageId image_id) { - const ImageViewInfo info{ImageViewType::e2D, view_format}; + ImageViewInfo info{ImageViewType::e2D, view_format}; + if (config.blending == Tegra::BlendMode::Opaque) { + info.x_source = static_cast<u8>(SwizzleSource::R); + info.y_source = static_cast<u8>(SwizzleSource::G); + info.z_source = static_cast<u8>(SwizzleSource::B); + info.w_source = static_cast<u8>(SwizzleSource::OneFloat); + } return std::make_pair(&slot_image_views[FindOrEmplaceImageView(image_id, info)], slot_images[image_id].IsRescaled()); }; diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e7b910121..da98a634b 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -21,6 +21,7 @@ #include "common/lru_cache.h" #include "common/polyfill_ranges.h" #include "common/scratch_buffer.h" +#include "common/slot_vector.h" #include "common/thread_worker.h" #include "video_core/compatible_formats.h" #include "video_core/control/channel_state_cache.h" @@ -32,7 +33,6 @@ #include "video_core/texture_cache/image_info.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/render_targets.h" -#include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" @@ -451,16 +451,16 @@ private: struct PendingDownload { bool is_swizzle; size_t async_buffer_id; - SlotId object_id; + Common::SlotId object_id; }; - SlotVector<Image> slot_images; - SlotVector<ImageMapView> slot_map_views; - SlotVector<ImageView> slot_image_views; - SlotVector<ImageAlloc> slot_image_allocs; - SlotVector<Sampler> slot_samplers; - SlotVector<Framebuffer> slot_framebuffers; - SlotVector<BufferDownload> slot_buffer_downloads; + Common::SlotVector<Image> slot_images; + Common::SlotVector<ImageMapView> slot_map_views; + Common::SlotVector<ImageView> slot_image_views; + Common::SlotVector<ImageAlloc> slot_image_allocs; + Common::SlotVector<Sampler> slot_samplers; + Common::SlotVector<Framebuffer> slot_framebuffers; + Common::SlotVector<BufferDownload> slot_buffer_downloads; // TODO: This data structure is not optimal and it should be reworked diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 0453456b4..07c304386 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -5,21 +5,21 @@ #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/texture_cache/slot_vector.h" +#include "common/slot_vector.h" namespace VideoCommon { constexpr size_t NUM_RT = 8; constexpr size_t MAX_MIP_LEVELS = 14; -constexpr SlotId CORRUPT_ID{0xfffffffe}; +constexpr Common::SlotId CORRUPT_ID{0xfffffffe}; -using ImageId = SlotId; -using ImageMapId = SlotId; -using ImageViewId = SlotId; -using ImageAllocId = SlotId; -using SamplerId = SlotId; -using FramebufferId = SlotId; +using ImageId = Common::SlotId; +using ImageMapId = Common::SlotId; +using ImageViewId = Common::SlotId; +using ImageAllocId = Common::SlotId; +using SamplerId = Common::SlotId; +using FramebufferId = Common::SlotId; /// Fake image ID for null image views constexpr ImageId NULL_IMAGE_ID{0}; |
