aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h250
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h135
-rw-r--r--src/video_core/capture.h36
-rw-r--r--src/video_core/framebuffer_config.h7
-rw-r--r--src/video_core/gpu.cpp15
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/host1x/host1x.cpp2
-rw-r--r--src/video_core/host_shaders/fidelityfx_fsr.frag21
-rw-r--r--src/video_core/host_shaders/fxaa.frag2
-rw-r--r--src/video_core/host_shaders/opengl_fidelityfx_fsr.frag19
-rw-r--r--src/video_core/host_shaders/opengl_present.frag2
-rw-r--r--src/video_core/host_shaders/present_bicubic.frag2
-rw-r--r--src/video_core/host_shaders/present_gaussian.frag14
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag1
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag1
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag1
-rw-r--r--src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag1
-rw-r--r--src/video_core/host_shaders/vulkan_present.vert10
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag2
-rw-r--r--src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag2
-rw-r--r--src/video_core/memory_manager.cpp17
-rw-r--r--src/video_core/memory_manager.h8
-rw-r--r--src/video_core/present.h37
-rw-r--r--src/video_core/query_cache.h6
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_null/renderer_null.cpp5
-rw-r--r--src/video_core/renderer_null/renderer_null.h2
-rw-r--r--src/video_core/renderer_opengl/gl_blit_screen.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_blit_screen.h7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/present/layer.cpp35
-rw-r--r--src/video_core/renderer_opengl/present/layer.h8
-rw-r--r--src/video_core/renderer_opengl/present/window_adapt_pass.cpp19
-rw-r--r--src/video_core/renderer_opengl/present/window_adapt_pass.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp89
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h9
-rw-r--r--src/video_core/renderer_vulkan/present/layer.cpp19
-rw-r--r--src/video_core/renderer_vulkan/present/layer.h6
-rw-r--r--src/video_core/renderer_vulkan/present/util.cpp92
-rw-r--r--src/video_core/renderer_vulkan/present/util.h9
-rw-r--r--src/video_core/renderer_vulkan/present/window_adapt_pass.cpp29
-rw-r--r--src/video_core/renderer_vulkan/present/window_adapt_pass.h6
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp114
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/texture_cache/slot_vector.h227
-rw-r--r--src/video_core/texture_cache/texture_cache.h8
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h18
-rw-r--r--src/video_core/texture_cache/types.h16
55 files changed, 668 insertions, 708 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 16c905db9..2de2beb6e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -18,6 +18,7 @@ add_library(video_core STATIC
buffer_cache/usage_tracker.h
buffer_cache/word_manager.h
cache_types.h
+ capture.h
cdma_pusher.cpp
cdma_pusher.h
compatible_formats.cpp
@@ -101,6 +102,7 @@ add_library(video_core STATIC
memory_manager.cpp
memory_manager.h
precompiled_headers.h
+ present.h
pte_kind.h
query_cache/bank_base.h
query_cache/query_base.h
@@ -274,7 +276,6 @@ add_library(video_core STATIC
texture_cache/image_view_info.h
texture_cache/render_targets.h
texture_cache/samples_helper.h
- texture_cache/slot_vector.h
texture_cache/texture_cache.cpp
texture_cache/texture_cache.h
texture_cache/texture_cache_base.h
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b4bf369d1..6d3d933c5 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -7,6 +7,7 @@
#include <memory>
#include <numeric>
+#include "common/range_sets.inc"
#include "video_core/buffer_cache/buffer_cache_base.h"
#include "video_core/guest_memory.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
@@ -20,7 +21,7 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
- common_ranges.clear();
+ gpu_modified_ranges.Clear();
inline_buffer_id = NULL_BUFFER_ID;
if (!runtime.CanReportMemoryUsage()) {
@@ -44,6 +45,9 @@ BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R
}
template <class P>
+BufferCache<P>::~BufferCache() = default;
+
+template <class P>
void BufferCache<P>::RunGarbageCollector() {
const bool aggressive_gc = total_used_memory >= critical_memory;
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
@@ -96,20 +100,17 @@ void BufferCache<P>::TickFrame() {
++frame_tick;
delayed_destruction_ring.Tick();
- if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- for (auto& buffer : async_buffers_death_ring) {
- runtime.FreeDeferredStagingBuffer(buffer);
- }
- async_buffers_death_ring.clear();
+ for (auto& buffer : async_buffers_death_ring) {
+ runtime.FreeDeferredStagingBuffer(buffer);
}
+ async_buffers_death_ring.clear();
}
template <class P>
void BufferCache<P>::WriteMemory(DAddr device_addr, u64 size) {
if (memory_tracker.IsRegionGpuModified(device_addr, size)) {
- const IntervalType subtract_interval{device_addr, device_addr + size};
- ClearDownload(subtract_interval);
- common_ranges.subtract(subtract_interval);
+ ClearDownload(device_addr, size);
+ gpu_modified_ranges.Subtract(device_addr, size);
}
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
}
@@ -174,11 +175,11 @@ void BufferCache<P>::DownloadMemory(DAddr device_addr, u64 size) {
}
template <class P>
-void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
- RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024);
- uncommitted_ranges.subtract(subtract_interval);
- for (auto& interval_set : committed_ranges) {
- interval_set.subtract(subtract_interval);
+void BufferCache<P>::ClearDownload(DAddr device_addr, u64 size) {
+ async_downloads.DeleteAll(device_addr, size);
+ uncommitted_gpu_modified_ranges.Subtract(device_addr, size);
+ for (auto& interval_set : committed_gpu_modified_ranges) {
+ interval_set.Subtract(device_addr, size);
}
}
@@ -195,8 +196,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
return false;
}
- const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
- ClearDownload(subtract_interval);
+ ClearDownload(*cpu_dest_address, amount);
BufferId buffer_a;
BufferId buffer_b;
@@ -215,21 +215,20 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
.size = amount,
}};
- boost::container::small_vector<IntervalType, 4> tmp_intervals;
+ boost::container::small_vector<std::pair<DAddr, size_t>, 4> tmp_intervals;
auto mirror = [&](DAddr base_address, DAddr base_address_end) {
const u64 size = base_address_end - base_address;
const DAddr diff = base_address - *cpu_src_address;
const DAddr new_base_address = *cpu_dest_address + diff;
- const IntervalType add_interval{new_base_address, new_base_address + size};
- tmp_intervals.push_back(add_interval);
- uncommitted_ranges.add(add_interval);
+ tmp_intervals.push_back({new_base_address, size});
+ uncommitted_gpu_modified_ranges.Add(new_base_address, size);
};
- ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
+ gpu_modified_ranges.ForEachInRange(*cpu_src_address, amount, mirror);
// This subtraction in this order is important for overlapping copies.
- common_ranges.subtract(subtract_interval);
+ gpu_modified_ranges.Subtract(*cpu_dest_address, amount);
const bool has_new_downloads = tmp_intervals.size() != 0;
- for (const IntervalType& add_interval : tmp_intervals) {
- common_ranges.add(add_interval);
+ for (const auto& pair : tmp_intervals) {
+ gpu_modified_ranges.Add(pair.first, pair.second);
}
const auto& copy = copies[0];
src_buffer.MarkUsage(copy.src_offset, copy.size);
@@ -257,9 +256,8 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
}
const size_t size = amount * sizeof(u32);
- const IntervalType subtract_interval{*cpu_dst_address, *cpu_dst_address + size};
- ClearDownload(subtract_interval);
- common_ranges.subtract(subtract_interval);
+ ClearDownload(*cpu_dst_address, size);
+ gpu_modified_ranges.Subtract(*cpu_dst_address, size);
const BufferId buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
Buffer& dest_buffer = slot_buffers[buffer];
@@ -300,11 +298,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
MarkWrittenBuffer(buffer_id, device_addr, size);
break;
case ObtainBufferOperation::DiscardWrite: {
- DAddr device_addr_start = Common::AlignDown(device_addr, 64);
- DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
- IntervalType interval{device_addr_start, device_addr_end};
- ClearDownload(interval);
- common_ranges.subtract(interval);
+ const DAddr device_addr_start = Common::AlignDown(device_addr, 64);
+ const DAddr device_addr_end = Common::AlignUp(device_addr + size, 64);
+ const size_t new_size = device_addr_end - device_addr_start;
+ ClearDownload(device_addr_start, new_size);
+ gpu_modified_ranges.Subtract(device_addr_start, new_size);
break;
}
default:
@@ -504,46 +502,40 @@ void BufferCache<P>::FlushCachedWrites() {
template <class P>
bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
- return !uncommitted_ranges.empty() || !committed_ranges.empty();
+ return !uncommitted_gpu_modified_ranges.Empty() || !committed_gpu_modified_ranges.empty();
}
template <class P>
void BufferCache<P>::AccumulateFlushes() {
- if (uncommitted_ranges.empty()) {
+ if (uncommitted_gpu_modified_ranges.Empty()) {
return;
}
- committed_ranges.emplace_back(std::move(uncommitted_ranges));
+ committed_gpu_modified_ranges.emplace_back(std::move(uncommitted_gpu_modified_ranges));
}
template <class P>
bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
- if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- return (!async_buffers.empty() && async_buffers.front().has_value());
- } else {
- return false;
- }
+ return (!async_buffers.empty() && async_buffers.front().has_value());
}
template <class P>
void BufferCache<P>::CommitAsyncFlushesHigh() {
AccumulateFlushes();
- if (committed_ranges.empty()) {
- if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- async_buffers.emplace_back(std::optional<Async_Buffer>{});
- }
+ if (committed_gpu_modified_ranges.empty()) {
+ async_buffers.emplace_back(std::optional<Async_Buffer>{});
return;
}
MICROPROFILE_SCOPE(GPU_DownloadMemory);
- auto it = committed_ranges.begin();
- while (it != committed_ranges.end()) {
+ auto it = committed_gpu_modified_ranges.begin();
+ while (it != committed_gpu_modified_ranges.end()) {
auto& current_intervals = *it;
auto next_it = std::next(it);
- while (next_it != committed_ranges.end()) {
- for (auto& interval : *next_it) {
- current_intervals.subtract(interval);
- }
+ while (next_it != committed_gpu_modified_ranges.end()) {
+ next_it->ForEach([&current_intervals](DAddr start, DAddr end) {
+ current_intervals.Subtract(start, end - start);
+ });
next_it++;
}
it++;
@@ -552,10 +544,10 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;
- for (const IntervalSet& intervals : committed_ranges) {
- for (auto& interval : intervals) {
- const std::size_t size = interval.upper() - interval.lower();
- const DAddr device_addr = interval.lower();
+ for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) {
+ range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) {
+ const std::size_t size = interval_upper - interval_lower;
+ const DAddr device_addr = interval_lower;
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
const DAddr buffer_start = buffer.CpuAddr();
const DAddr buffer_end = buffer_start + buffer.SizeBytes();
@@ -583,77 +575,35 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
largest_copy = std::max(largest_copy, new_size);
};
- ForEachInRangeSet(common_ranges, device_addr_out, range_size, add_download);
+ gpu_modified_ranges.ForEachInRange(device_addr_out, range_size,
+ add_download);
});
});
- }
+ });
}
- committed_ranges.clear();
+ committed_gpu_modified_ranges.clear();
if (downloads.empty()) {
- if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- async_buffers.emplace_back(std::optional<Async_Buffer>{});
- }
+ async_buffers.emplace_back(std::optional<Async_Buffer>{});
return;
}
- if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
- boost::container::small_vector<BufferCopy, 4> normalized_copies;
- IntervalSet new_async_range{};
- runtime.PreCopyBarrier();
- for (auto& [copy, buffer_id] : downloads) {
- copy.dst_offset += download_staging.offset;
- const std::array copies{copy};
- BufferCopy second_copy{copy};
- Buffer& buffer = slot_buffers[buffer_id];
- second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
- DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
- const IntervalType base_interval{orig_device_addr, orig_device_addr + copy.size};
- async_downloads += std::make_pair(base_interval, 1);
- buffer.MarkUsage(copy.src_offset, copy.size);
- runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
- normalized_copies.push_back(second_copy);
- }
- runtime.PostCopyBarrier();
- pending_downloads.emplace_back(std::move(normalized_copies));
- async_buffers.emplace_back(download_staging);
- } else {
- if (!Settings::IsGPULevelHigh()) {
- committed_ranges.clear();
- uncommitted_ranges.clear();
- } else {
- if constexpr (USE_MEMORY_MAPS) {
- auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
- runtime.PreCopyBarrier();
- for (auto& [copy, buffer_id] : downloads) {
- // Have in mind the staging buffer offset for the copy
- copy.dst_offset += download_staging.offset;
- const std::array copies{copy};
- Buffer& buffer = slot_buffers[buffer_id];
- buffer.MarkUsage(copy.src_offset, copy.size);
- runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
- }
- runtime.PostCopyBarrier();
- runtime.Finish();
- for (const auto& [copy, buffer_id] : downloads) {
- const Buffer& buffer = slot_buffers[buffer_id];
- const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
- // Undo the modified offset
- const u64 dst_offset = copy.dst_offset - download_staging.offset;
- const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
- device_memory.WriteBlockUnsafe(device_addr, read_mapped_memory, copy.size);
- }
- } else {
- const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
- for (const auto& [copy, buffer_id] : downloads) {
- Buffer& buffer = slot_buffers[buffer_id];
- buffer.ImmediateDownload(copy.src_offset,
- immediate_buffer.subspan(0, copy.size));
- const DAddr device_addr = buffer.CpuAddr() + copy.src_offset;
- device_memory.WriteBlockUnsafe(device_addr, immediate_buffer.data(), copy.size);
- }
- }
- }
+ auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
+ boost::container::small_vector<BufferCopy, 4> normalized_copies;
+ runtime.PreCopyBarrier();
+ for (auto& [copy, buffer_id] : downloads) {
+ copy.dst_offset += download_staging.offset;
+ const std::array copies{copy};
+ BufferCopy second_copy{copy};
+ Buffer& buffer = slot_buffers[buffer_id];
+ second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
+ const DAddr orig_device_addr = static_cast<DAddr>(second_copy.src_offset);
+ async_downloads.Add(orig_device_addr, copy.size);
+ buffer.MarkUsage(copy.src_offset, copy.size);
+ runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
+ normalized_copies.push_back(second_copy);
}
+ runtime.PostCopyBarrier();
+ pending_downloads.emplace_back(std::move(normalized_copies));
+ async_buffers.emplace_back(download_staging);
}
template <class P>
@@ -676,37 +626,31 @@ void BufferCache<P>::PopAsyncBuffers() {
async_buffers.pop_front();
return;
}
- if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
- auto& downloads = pending_downloads.front();
- auto& async_buffer = async_buffers.front();
- u8* base = async_buffer->mapped_span.data();
- const size_t base_offset = async_buffer->offset;
- for (const auto& copy : downloads) {
- const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
- const u64 dst_offset = copy.dst_offset - base_offset;
- const u8* read_mapped_memory = base + dst_offset;
- ForEachInOverlapCounter(
- async_downloads, device_addr, copy.size, [&](DAddr start, DAddr end, int count) {
- device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
- end - start);
- if (count == 1) {
- const IntervalType base_interval{start, end};
- common_ranges.subtract(base_interval);
- }
- });
- const IntervalType subtract_interval{device_addr, device_addr + copy.size};
- RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
- }
- async_buffers_death_ring.emplace_back(*async_buffer);
- async_buffers.pop_front();
- pending_downloads.pop_front();
+ auto& downloads = pending_downloads.front();
+ auto& async_buffer = async_buffers.front();
+ u8* base = async_buffer->mapped_span.data();
+ const size_t base_offset = async_buffer->offset;
+ for (const auto& copy : downloads) {
+ const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
+ const u64 dst_offset = copy.dst_offset - base_offset;
+ const u8* read_mapped_memory = base + dst_offset;
+ async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) {
+ device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
+ end - start);
+ });
+ async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) {
+ gpu_modified_ranges.Subtract(start, end - start);
+ });
}
+ async_buffers_death_ring.emplace_back(*async_buffer);
+ async_buffers.pop_front();
+ pending_downloads.pop_front();
}
template <class P>
bool BufferCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
bool is_dirty = false;
- ForEachInRangeSet(common_ranges, addr, size, [&](DAddr, DAddr) { is_dirty = true; });
+ gpu_modified_ranges.ForEachInRange(addr, size, [&](DAddr, DAddr) { is_dirty = true; });
return is_dirty;
}
@@ -1320,10 +1264,8 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
template <class P>
void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, DAddr device_addr, u32 size) {
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
-
- const IntervalType base_interval{device_addr, device_addr + size};
- common_ranges.add(base_interval);
- uncommitted_ranges.add(base_interval);
+ gpu_modified_ranges.Add(device_addr, size);
+ uncommitted_gpu_modified_ranges.Add(device_addr, size);
}
template <class P>
@@ -1600,9 +1542,8 @@ bool BufferCache<P>::InlineMemory(DAddr dest_address, size_t copy_size,
template <class P>
void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
std::span<const u8> inlined_buffer) {
- const IntervalType subtract_interval{dest_address, dest_address + copy_size};
- ClearDownload(subtract_interval);
- common_ranges.subtract(subtract_interval);
+ ClearDownload(dest_address, copy_size);
+ gpu_modified_ranges.Subtract(dest_address, copy_size);
BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size));
auto& buffer = slot_buffers[buffer_id];
@@ -1652,12 +1593,9 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
largest_copy = std::max(largest_copy, new_size);
};
- const DAddr start_address = device_addr_out;
- const DAddr end_address = start_address + range_size;
- ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
- const IntervalType subtract_interval{start_address, end_address};
- ClearDownload(subtract_interval);
- common_ranges.subtract(subtract_interval);
+ gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
+ ClearDownload(device_addr_out, range_size);
+ gpu_modified_ranges.Subtract(device_addr_out, range_size);
});
if (total_size_bytes == 0) {
return;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 80dbb81e7..240e9f015 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -13,25 +13,15 @@
#include <unordered_map>
#include <vector>
-#include <boost/container/small_vector.hpp>
-#define BOOST_NO_MT
-#include <boost/pool/detail/mutex.hpp>
-#undef BOOST_NO_MT
-#include <boost/icl/interval.hpp>
-#include <boost/icl/interval_base_set.hpp>
-#include <boost/icl/interval_set.hpp>
-#include <boost/icl/split_interval_map.hpp>
-#include <boost/pool/pool.hpp>
-#include <boost/pool/pool_alloc.hpp>
-#include <boost/pool/poolfwd.hpp>
-
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/microprofile.h"
+#include "common/range_sets.h"
#include "common/scope_exit.h"
#include "common/settings.h"
+#include "common/slot_vector.h"
#include "video_core/buffer_cache/buffer_base.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
@@ -41,21 +31,15 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/surface.h"
-#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
-namespace boost {
-template <typename T>
-class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
-}
-
namespace VideoCommon {
MICROPROFILE_DECLARE(GPU_PrepareBuffers);
MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
MICROPROFILE_DECLARE(GPU_DownloadMemory);
-using BufferId = SlotId;
+using BufferId = Common::SlotId;
using VideoCore::Surface::PixelFormat;
using namespace Common::Literals;
@@ -184,7 +168,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
- static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
@@ -202,34 +185,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
using Async_Buffer = typename P::Async_Buffer;
using MemoryTracker = typename P::MemoryTracker;
- using IntervalCompare = std::less<DAddr>;
- using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
- using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
- using IntervalSet = boost::icl::interval_set<DAddr>;
- using IntervalType = typename IntervalSet::interval_type;
-
- template <typename Type>
- struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
- // types
- typedef counter_add_functor<Type> type;
- typedef boost::icl::identity_based_inplace_combine<Type> base_type;
-
- // public member functions
- void operator()(Type& current, const Type& added) const {
- current += added;
- if (current < base_type::identity_element()) {
- current = base_type::identity_element();
- }
- }
-
- // public static functions
- static void version(Type&){};
- };
-
- using OverlapCombine = counter_add_functor<int>;
- using OverlapSection = boost::icl::inter_section<int>;
- using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
-
struct OverlapResult {
boost::container::small_vector<BufferId, 16> ids;
DAddr begin;
@@ -240,6 +195,8 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
public:
explicit BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_);
+ ~BufferCache();
+
void TickFrame();
void WriteMemory(DAddr device_addr, u64 size);
@@ -379,75 +336,6 @@ private:
}
}
- template <typename Func>
- void ForEachInRangeSet(IntervalSet& current_range, DAddr device_addr, u64 size, Func&& func) {
- const DAddr start_address = device_addr;
- const DAddr end_address = start_address + size;
- const IntervalType search_interval{start_address, end_address};
- auto it = current_range.lower_bound(search_interval);
- if (it == current_range.end()) {
- return;
- }
- auto end_it = current_range.upper_bound(search_interval);
- for (; it != end_it; it++) {
- DAddr inter_addr_end = it->upper();
- DAddr inter_addr = it->lower();
- if (inter_addr_end > end_address) {
- inter_addr_end = end_address;
- }
- if (inter_addr < start_address) {
- inter_addr = start_address;
- }
- func(inter_addr, inter_addr_end);
- }
- }
-
- template <typename Func>
- void ForEachInOverlapCounter(OverlapCounter& current_range, DAddr device_addr, u64 size,
- Func&& func) {
- const DAddr start_address = device_addr;
- const DAddr end_address = start_address + size;
- const IntervalType search_interval{start_address, end_address};
- auto it = current_range.lower_bound(search_interval);
- if (it == current_range.end()) {
- return;
- }
- auto end_it = current_range.upper_bound(search_interval);
- for (; it != end_it; it++) {
- auto& inter = it->first;
- DAddr inter_addr_end = inter.upper();
- DAddr inter_addr = inter.lower();
- if (inter_addr_end > end_address) {
- inter_addr_end = end_address;
- }
- if (inter_addr < start_address) {
- inter_addr = start_address;
- }
- func(inter_addr, inter_addr_end, it->second);
- }
- }
-
- void RemoveEachInOverlapCounter(OverlapCounter& current_range,
- const IntervalType search_interval, int subtract_value) {
- bool any_removals = false;
- current_range.add(std::make_pair(search_interval, subtract_value));
- do {
- any_removals = false;
- auto it = current_range.lower_bound(search_interval);
- if (it == current_range.end()) {
- return;
- }
- auto end_it = current_range.upper_bound(search_interval);
- for (; it != end_it; it++) {
- if (it->second <= 0) {
- any_removals = true;
- current_range.erase(it);
- break;
- }
- }
- } while (any_removals);
- }
-
static bool IsRangeGranular(DAddr device_addr, size_t size) {
return (device_addr & ~Core::DEVICE_PAGEMASK) ==
((device_addr + size) & ~Core::DEVICE_PAGEMASK);
@@ -552,14 +440,14 @@ private:
[[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
- void ClearDownload(IntervalType subtract_interval);
+ void ClearDownload(DAddr base_addr, u64 size);
void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
std::span<const u8> inlined_buffer);
Tegra::MaxwellDeviceMemoryManager& device_memory;
- SlotVector<Buffer> slot_buffers;
+ Common::SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
@@ -567,13 +455,12 @@ private:
u32 last_index_count = 0;
MemoryTracker memory_tracker;
- IntervalSet uncommitted_ranges;
- IntervalSet common_ranges;
- IntervalSet cached_ranges;
- std::deque<IntervalSet> committed_ranges;
+ Common::RangeSet<DAddr> uncommitted_gpu_modified_ranges;
+ Common::RangeSet<DAddr> gpu_modified_ranges;
+ std::deque<Common::RangeSet<DAddr>> committed_gpu_modified_ranges;
// Async Buffers
- OverlapCounter async_downloads;
+ Common::OverlapRangeSet<DAddr> async_downloads;
std::deque<std::optional<Async_Buffer>> async_buffers;
std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
std::optional<Async_Buffer> current_buffer;
diff --git a/src/video_core/capture.h b/src/video_core/capture.h
new file mode 100644
index 000000000..8db14a8ec
--- /dev/null
+++ b/src/video_core/capture.h
@@ -0,0 +1,36 @@
+// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/alignment.h"
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "core/frontend/framebuffer_layout.h"
+#include "video_core/surface.h"
+
+namespace VideoCore::Capture {
+
+constexpr u32 BlockHeight = 4;
+constexpr u32 BlockDepth = 0;
+constexpr u32 BppLog2 = 2;
+
+constexpr auto PixelFormat = Surface::PixelFormat::B8G8R8A8_UNORM;
+
+constexpr auto LinearWidth = Layout::ScreenUndocked::Width;
+constexpr auto LinearHeight = Layout::ScreenUndocked::Height;
+constexpr auto LinearDepth = 1U;
+constexpr auto BytesPerPixel = 4U;
+
+constexpr auto TiledWidth = LinearWidth;
+constexpr auto TiledHeight = Common::AlignUpLog2(LinearHeight, BlockHeight + BlockDepth + BppLog2);
+constexpr auto TiledSize = TiledWidth * TiledHeight * (1 << BppLog2);
+
+constexpr Layout::FramebufferLayout Layout{
+ .width = LinearWidth,
+ .height = LinearHeight,
+ .screen = {0, 0, LinearWidth, LinearHeight},
+ .is_srgb = false,
+};
+
+} // namespace VideoCore::Capture
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h
index 6a18b76fb..8b2a49de5 100644
--- a/src/video_core/framebuffer_config.h
+++ b/src/video_core/framebuffer_config.h
@@ -11,6 +11,12 @@
namespace Tegra {
+enum class BlendMode {
+ Opaque,
+ Premultiplied,
+ Coverage,
+};
+
/**
* Struct describing framebuffer configuration
*/
@@ -23,6 +29,7 @@ struct FramebufferConfig {
Service::android::PixelFormat pixel_format{};
Service::android::BufferTransformFlags transform_flags{};
Common::Rectangle<int> crop_rect{};
+ BlendMode blending{};
};
Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width,
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index f4a5d831c..8e663f2a8 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -347,6 +347,17 @@ struct GPU::Impl {
WaitForSyncOperation(wait_fence);
}
+ std::vector<u8> GetAppletCaptureBuffer() {
+ std::vector<u8> out;
+
+ const auto wait_fence =
+ RequestSyncOperation([&] { out = renderer->GetAppletCaptureBuffer(); });
+ gpu_thread.TickGPU();
+ WaitForSyncOperation(wait_fence);
+
+ return out;
+ }
+
GPU& gpu;
Core::System& system;
Host1x::Host1x& host1x;
@@ -505,6 +516,10 @@ void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
impl->RequestComposite(std::move(layers), std::move(fences));
}
+std::vector<u8> GPU::GetAppletCaptureBuffer() {
+ return impl->GetAppletCaptureBuffer();
+}
+
u64 GPU::GetTicks() const {
return impl->GetTicks();
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index c4602ca37..ad535512c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -215,6 +215,8 @@ public:
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
std::vector<Service::Nvidia::NvFence>&& fences);
+ std::vector<u8> GetAppletCaptureBuffer();
+
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp
index c4c7a5883..e923bfa22 100644
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@@ -10,7 +10,7 @@ namespace Host1x {
Host1x::Host1x(Core::System& system_)
: system{system_}, syncpoint_manager{},
- memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
+ memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 0, 12},
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
Host1x::~Host1x() = default;
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.frag b/src/video_core/host_shaders/fidelityfx_fsr.frag
index a266e1c4e..54eedb450 100644
--- a/src/video_core/host_shaders/fidelityfx_fsr.frag
+++ b/src/video_core/host_shaders/fidelityfx_fsr.frag
@@ -37,6 +37,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
#define A_GPU 1
#define A_GLSL 1
+#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#ifndef YUZU_USE_FP16
#include "ffx_a.h"
@@ -71,9 +72,7 @@ layout(set=0,binding=0) uniform sampler2D InputTexture;
#include "ffx_fsr1.h"
-#if USE_RCAS
- layout(location = 0) in vec2 frag_texcoord;
-#endif
+layout (location = 0) in vec2 frag_texcoord;
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos) {
@@ -81,22 +80,22 @@ void CurrFilter(AU2 pos) {
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
- frag_color = AF4(c, 1.0);
+ frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
#else
AH3 c;
FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
- frag_color = AH4(c, 1.0);
+ frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
- AF3 c;
- FsrRcasF(c.r, c.g, c.b, pos, Const0);
- frag_color = AF4(c, 1.0);
+ AF4 c;
+ FsrRcasF(c.r, c.g, c.b, c.a, pos, Const0);
+ frag_color = c;
#else
- AH3 c;
- FsrRcasH(c.r, c.g, c.b, pos, Const0);
- frag_color = AH4(c, 1.0);
+ AH4 c;
+ FsrRcasH(c.r, c.g, c.b, c.a, pos, Const0);
+ frag_color = c;
#endif
#endif
}
diff --git a/src/video_core/host_shaders/fxaa.frag b/src/video_core/host_shaders/fxaa.frag
index 9bffc20d5..192a602c1 100644
--- a/src/video_core/host_shaders/fxaa.frag
+++ b/src/video_core/host_shaders/fxaa.frag
@@ -71,5 +71,5 @@ vec3 FxaaPixelShader(vec4 posPos, sampler2D tex) {
}
void main() {
- frag_color = vec4(FxaaPixelShader(posPos, input_texture), 1.0);
+ frag_color = vec4(FxaaPixelShader(posPos, input_texture), texture(input_texture, posPos.xy).a);
}
diff --git a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag
index 16d22f58e..fc47d3810 100644
--- a/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag
+++ b/src/video_core/host_shaders/opengl_fidelityfx_fsr.frag
@@ -31,6 +31,7 @@ layout (location = 0) uniform uvec4 constants[4];
#define A_GPU 1
#define A_GLSL 1
+#define FSR_RCAS_PASSTHROUGH_ALPHA 1
#ifdef YUZU_USE_FP16
#define A_HALF
@@ -67,9 +68,7 @@ layout (location = 0) uniform uvec4 constants[4];
#include "ffx_fsr1.h"
-#if USE_RCAS
- layout(location = 0) in vec2 frag_texcoord;
-#endif
+layout (location = 0) in vec2 frag_texcoord;
layout (location = 0) out vec4 frag_color;
void CurrFilter(AU2 pos)
@@ -78,22 +77,22 @@ void CurrFilter(AU2 pos)
#ifndef YUZU_USE_FP16
AF3 c;
FsrEasuF(c, pos, constants[0], constants[1], constants[2], constants[3]);
- frag_color = AF4(c, 1.0);
+ frag_color = AF4(c, texture(InputTexture, frag_texcoord).a);
#else
AH3 c;
FsrEasuH(c, pos, constants[0], constants[1], constants[2], constants[3]);
- frag_color = AH4(c, 1.0);
+ frag_color = AH4(c, texture(InputTexture, frag_texcoord).a);
#endif
#endif
#if USE_RCAS
#ifndef YUZU_USE_FP16
- AF3 c;
- FsrRcasF(c.r, c.g, c.b, pos, constants[0]);
- frag_color = AF4(c, 1.0);
+ AF4 c;
+ FsrRcasF(c.r, c.g, c.b, c.a, pos, constants[0]);
+ frag_color = c;
#else
AH3 c;
- FsrRcasH(c.r, c.g, c.b, pos, constants[0]);
- frag_color = AH4(c, 1.0);
+ FsrRcasH(c.r, c.g, c.b, c.a, pos, constants[0]);
+ frag_color = c;
#endif
#endif
}
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
index 5fd7ad297..096b4e4db 100644
--- a/src/video_core/host_shaders/opengl_present.frag
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -9,5 +9,5 @@ layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture;
void main() {
- color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
+ color = vec4(texture(color_texture, frag_tex_coord));
}
diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag
index c814629cf..a9d9d40a3 100644
--- a/src/video_core/host_shaders/present_bicubic.frag
+++ b/src/video_core/host_shaders/present_bicubic.frag
@@ -52,5 +52,5 @@ vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) {
}
void main() {
- color = vec4(textureBicubic(color_texture, frag_tex_coord).rgb, 1.0f);
+ color = textureBicubic(color_texture, frag_tex_coord);
}
diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag
index ad9bb76a4..78edeb9b4 100644
--- a/src/video_core/host_shaders/present_gaussian.frag
+++ b/src/video_core/host_shaders/present_gaussian.frag
@@ -46,14 +46,14 @@ vec4 blurDiagonal(sampler2D textureSampler, vec2 coord, vec2 norm) {
}
void main() {
- vec3 base = texture(color_texture, vec2(frag_tex_coord)).rgb * weight[0];
+ vec4 base = texture(color_texture, vec2(frag_tex_coord)) * weight[0];
vec2 tex_offset = 1.0f / textureSize(color_texture, 0);
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
- vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
- vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
- vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
- vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
- vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
- color = vec4(combination + base, 1.0f);
+ vec4 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset);
+ vec4 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset);
+ vec4 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset);
+ vec4 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0));
+ vec4 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
+ color = combination + base;
}
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag
index d369bef06..05d033310 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag
@@ -6,5 +6,6 @@
#define YUZU_USE_FP16
#define USE_EASU 1
+#define VERSION 1
#include "fidelityfx_fsr.frag"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag
index 6f25ef00f..7ae11dd66 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag
@@ -5,5 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define USE_EASU 1
+#define VERSION 1
#include "fidelityfx_fsr.frag"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag
index 0c953a900..c017214a5 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag
@@ -6,5 +6,6 @@
#define YUZU_USE_FP16
#define USE_RCAS 1
+#define VERSION 1
#include "fidelityfx_fsr.frag"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag
index 02e9a27c6..976825f4b 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag
@@ -5,5 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
#define USE_RCAS 1
+#define VERSION 1
#include "fidelityfx_fsr.frag"
diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert
index 249c9675a..c0e6e8537 100644
--- a/src/video_core/host_shaders/vulkan_present.vert
+++ b/src/video_core/host_shaders/vulkan_present.vert
@@ -19,15 +19,13 @@ layout (push_constant) uniform PushConstants {
// Any member of a push constant block that is declared as an
// array must only be accessed with dynamically uniform indices.
ScreenRectVertex GetVertex(int index) {
- switch (index) {
- case 0:
- default:
+ if (index < 1) {
return vertices[0];
- case 1:
+ } else if (index < 2) {
return vertices[1];
- case 2:
+ } else if (index < 3) {
return vertices[2];
- case 3:
+ } else {
return vertices[3];
}
}
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
index 79ea817c2..cea5dac9d 100644
--- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
@@ -5,7 +5,7 @@
#extension GL_GOOGLE_include_directive : enable
-#define VERSION 1
+#define VERSION 2
#define YUZU_USE_FP16
#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
index 9605bb58b..10ddf0401 100644
--- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
@@ -5,6 +5,6 @@
#extension GL_GOOGLE_include_directive : enable
-#define VERSION 1
+#define VERSION 2
#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index a52f8e486..ffafc48ef 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -22,11 +22,12 @@ using Tegra::Memory::GuestMemoryFlags;
std::atomic<size_t> MemoryManager::unique_identifier_generator{};
MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
- u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_)
+ u64 address_space_bits_, GPUVAddr split_address_, u64 big_page_bits_,
+ u64 page_bits_)
: system{system_}, memory{memory_}, address_space_bits{address_space_bits_},
- page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{},
- page_table{address_space_bits, address_space_bits + page_bits - 38,
- page_bits != big_page_bits ? page_bits : 0},
+ split_address{split_address_}, page_bits{page_bits_}, big_page_bits{big_page_bits_},
+ entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
+ page_bits != big_page_bits ? page_bits : 0},
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
1, std::memory_order_acq_rel)},
accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
@@ -48,10 +49,10 @@ MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager&
entries.resize(page_table_size / 32, 0);
}
-MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
- u64 page_bits_)
- : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_,
- page_bits_) {}
+MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_,
+ GPUVAddr split_address_, u64 big_page_bits_, u64 page_bits_)
+ : MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, split_address_,
+ big_page_bits_, page_bits_) {}
MemoryManager::~MemoryManager() = default;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index c5255f36c..ac7c1472a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -36,10 +36,11 @@ namespace Tegra {
class MemoryManager final {
public:
explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
- u64 big_page_bits_ = 16, u64 page_bits_ = 12);
- explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
- u64 address_space_bits_ = 40, u64 big_page_bits_ = 16,
+ GPUVAddr split_address = 1ULL << 34, u64 big_page_bits_ = 16,
u64 page_bits_ = 12);
+ explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
+ u64 address_space_bits_ = 40, GPUVAddr split_address = 1ULL << 34,
+ u64 big_page_bits_ = 16, u64 page_bits_ = 12);
~MemoryManager();
size_t GetID() const {
@@ -192,6 +193,7 @@ private:
MaxwellDeviceMemoryManager& memory;
const u64 address_space_bits;
+ GPUVAddr split_address;
const u64 page_bits;
u64 address_space_size;
u64 page_size;
diff --git a/src/video_core/present.h b/src/video_core/present.h
new file mode 100644
index 000000000..4fdfcca68
--- /dev/null
+++ b/src/video_core/present.h
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include "common/settings.h"
+
+static inline Settings::ScalingFilter GetScalingFilter() {
+ return Settings::values.scaling_filter.GetValue();
+}
+
+static inline Settings::AntiAliasing GetAntiAliasing() {
+ return Settings::values.anti_aliasing.GetValue();
+}
+
+static inline Settings::ScalingFilter GetScalingFilterForAppletCapture() {
+ return Settings::ScalingFilter::Bilinear;
+}
+
+static inline Settings::AntiAliasing GetAntiAliasingForAppletCapture() {
+ return Settings::AntiAliasing::None;
+}
+
+struct PresentFilters {
+ Settings::ScalingFilter (*get_scaling_filter)();
+ Settings::AntiAliasing (*get_anti_aliasing)();
+};
+
+constexpr PresentFilters PresentFiltersForDisplay{
+ .get_scaling_filter = &GetScalingFilter,
+ .get_anti_aliasing = &GetAntiAliasing,
+};
+
+constexpr PresentFilters PresentFiltersForAppletCapture{
+ .get_scaling_filter = &GetScalingFilterForAppletCapture,
+ .get_anti_aliasing = &GetAntiAliasingForAppletCapture,
+};
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 4861b123a..e1019f228 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -18,12 +18,12 @@
#include "common/assert.h"
#include "common/settings.h"
+#include "common/slot_vector.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/texture_cache/slot_vector.h"
namespace VideoCore {
enum class QueryType {
@@ -37,7 +37,7 @@ constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count);
namespace VideoCommon {
-using AsyncJobId = SlotId;
+using AsyncJobId = Common::SlotId;
static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
@@ -341,7 +341,7 @@ private:
static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
static constexpr unsigned YUZU_PAGEBITS = 12;
- SlotVector<AsyncJob> slot_async_jobs;
+ Common::SlotVector<AsyncJob> slot_async_jobs;
VideoCore::RasterizerInterface& rasterizer;
Tegra::MaxwellDeviceMemoryManager& device_memory;
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 3ad180f67..67427f937 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -40,6 +40,9 @@ public:
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0;
+ /// Get the tiled applet layer capture buffer
+ virtual std::vector<u8> GetAppletCaptureBuffer() = 0;
+
[[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0;
[[nodiscard]] virtual std::string GetDeviceVendor() const = 0;
diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp
index c89daff53..e6147d66c 100644
--- a/src/video_core/renderer_null/renderer_null.cpp
+++ b/src/video_core/renderer_null/renderer_null.cpp
@@ -3,6 +3,7 @@
#include "core/frontend/emu_window.h"
#include "core/frontend/graphics_context.h"
+#include "video_core/capture.h"
#include "video_core/renderer_null/renderer_null.h"
namespace Null {
@@ -22,4 +23,8 @@ void RendererNull::Composite(std::span<const Tegra::FramebufferConfig> framebuff
render_window.OnFrameDisplayed();
}
+std::vector<u8> RendererNull::GetAppletCaptureBuffer() {
+ return std::vector<u8>(VideoCore::Capture::TiledSize);
+}
+
} // namespace Null
diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h
index 063b476bb..34dbe1e4f 100644
--- a/src/video_core/renderer_null/renderer_null.h
+++ b/src/video_core/renderer_null/renderer_null.h
@@ -19,6 +19,8 @@ public:
void Composite(std::span<const Tegra::FramebufferConfig> framebuffer) override;
+ std::vector<u8> GetAppletCaptureBuffer() override;
+
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &m_rasterizer;
}
diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp
index 6ba8b214b..9260a4dc4 100644
--- a/src/video_core/renderer_opengl/gl_blit_screen.cpp
+++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/settings.h"
+#include "video_core/present.h"
#include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/present/filters.h"
@@ -13,14 +14,14 @@ namespace OpenGL {
BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_,
Tegra::MaxwellDeviceMemoryManager& device_memory_,
StateTracker& state_tracker_, ProgramManager& program_manager_,
- Device& device_)
+ Device& device_, const PresentFilters& filters_)
: rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_),
- program_manager(program_manager_), device(device_) {}
+ program_manager(program_manager_), device(device_), filters(filters_) {}
BlitScreen::~BlitScreen() = default;
void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers,
- const Layout::FramebufferLayout& layout) {
+ const Layout::FramebufferLayout& layout, bool invert_y) {
// TODO: Signal state tracker about these changes
state_tracker.NotifyScreenDrawVertexArray();
state_tracker.NotifyPolygonModes();
@@ -56,22 +57,22 @@ void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffe
glDepthRangeIndexed(0, 0.0, 0.0);
while (layers.size() < framebuffers.size()) {
- layers.emplace_back(rasterizer, device_memory);
+ layers.emplace_back(rasterizer, device_memory, filters);
}
CreateWindowAdapt();
- window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout);
+ window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout, invert_y);
// TODO
// program_manager.RestoreGuestPipeline();
}
void BlitScreen::CreateWindowAdapt() {
- if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) {
+ if (window_adapt && filters.get_scaling_filter() == current_window_adapt) {
return;
}
- current_window_adapt = Settings::values.scaling_filter.GetValue();
+ current_window_adapt = filters.get_scaling_filter();
switch (current_window_adapt) {
case Settings::ScalingFilter::NearestNeighbor:
window_adapt = MakeNearestNeighbor(device);
diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h
index 0c3d838f1..df2da9424 100644
--- a/src/video_core/renderer_opengl/gl_blit_screen.h
+++ b/src/video_core/renderer_opengl/gl_blit_screen.h
@@ -15,6 +15,8 @@ namespace Layout {
struct FramebufferLayout;
}
+struct PresentFilters;
+
namespace Tegra {
struct FramebufferConfig;
}
@@ -46,12 +48,12 @@ public:
explicit BlitScreen(RasterizerOpenGL& rasterizer,
Tegra::MaxwellDeviceMemoryManager& device_memory,
StateTracker& state_tracker, ProgramManager& program_manager,
- Device& device);
+ Device& device, const PresentFilters& filters);
~BlitScreen();
/// Draws the emulated screens to the emulator window.
void DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers,
- const Layout::FramebufferLayout& layout);
+ const Layout::FramebufferLayout& layout, bool invert_y);
private:
void CreateWindowAdapt();
@@ -61,6 +63,7 @@ private:
StateTracker& state_tracker;
ProgramManager& program_manager;
Device& device;
+ const PresentFilters& filters;
Settings::ScalingFilter current_window_adapt{};
std::unique_ptr<WindowAdaptPass> window_adapt;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index af34c272b..fd471e979 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -90,7 +90,7 @@ public:
void PostCopyBarrier();
void Finish();
- void TickFrame(VideoCommon::SlotVector<Buffer>&) noexcept {}
+ void TickFrame(Common::SlotVector<Buffer>&) noexcept {}
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
@@ -251,7 +251,6 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
- static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3e54edcc2..d4165d8e4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -30,13 +30,13 @@ class Image;
class ImageView;
class Sampler;
+using Common::SlotVector;
using VideoCommon::ImageId;
using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
-using VideoCommon::SlotVector;
struct FormatProperties {
GLenum compatibility_class;
diff --git a/src/video_core/renderer_opengl/present/layer.cpp b/src/video_core/renderer_opengl/present/layer.cpp
index 8643e07c6..6c7092d22 100644
--- a/src/video_core/renderer_opengl/present/layer.cpp
+++ b/src/video_core/renderer_opengl/present/layer.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/framebuffer_config.h"
+#include "video_core/present.h"
#include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/present/fsr.h"
@@ -14,8 +15,9 @@
namespace OpenGL {
-Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
- : rasterizer(rasterizer_), device_memory(device_memory_) {
+Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_,
+ const PresentFilters& filters_)
+ : rasterizer(rasterizer_), device_memory(device_memory_), filters(filters_) {
// Allocate textures for the screen
framebuffer_texture.resource.Create(GL_TEXTURE_2D);
@@ -34,12 +36,12 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
std::array<ScreenRectVertex, 4>& out_vertices,
ProgramManager& program_manager,
const Tegra::FramebufferConfig& framebuffer,
- const Layout::FramebufferLayout& layout) {
+ const Layout::FramebufferLayout& layout, bool invert_y) {
FramebufferTextureInfo info = PrepareRenderTarget(framebuffer);
auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height);
GLuint texture = info.display_texture;
- auto anti_aliasing = Settings::values.anti_aliasing.GetValue();
+ auto anti_aliasing = filters.get_anti_aliasing();
if (anti_aliasing != Settings::AntiAliasing::None) {
glEnablei(GL_SCISSOR_TEST, 0);
auto viewport_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width);
@@ -64,7 +66,7 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
glDisablei(GL_SCISSOR_TEST, 0);
- if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) {
if (!fsr || fsr->NeedsRecreation(layout.screen)) {
fsr = std::make_unique<FSR>(layout.screen.GetWidth(), layout.screen.GetHeight());
}
@@ -83,10 +85,15 @@ GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
const auto w = screen.GetWidth();
const auto h = screen.GetHeight();
- out_vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top);
- out_vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top);
- out_vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom);
- out_vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom);
+ const auto left = crop.left;
+ const auto right = crop.right;
+ const auto top = invert_y ? crop.bottom : crop.top;
+ const auto bottom = invert_y ? crop.top : crop.bottom;
+
+ out_vertices[0] = ScreenRectVertex(x, y, left, top);
+ out_vertices[1] = ScreenRectVertex(x + w, y, right, top);
+ out_vertices[2] = ScreenRectVertex(x, y + h, left, bottom);
+ out_vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom);
return texture;
}
@@ -131,10 +138,12 @@ FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig&
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)};
- const std::span<const u8> input_data(host_ptr, size_in_bytes);
- Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
- framebuffer.width, framebuffer.height, 1, block_height_log2,
- 0);
+ if (host_ptr) {
+ const std::span<const u8> input_data(host_ptr, size_in_bytes);
+ Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
+ framebuffer.width, framebuffer.height, 1,
+ block_height_log2, 0);
+ }
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
diff --git a/src/video_core/renderer_opengl/present/layer.h b/src/video_core/renderer_opengl/present/layer.h
index ef1055abf..5b15b730f 100644
--- a/src/video_core/renderer_opengl/present/layer.h
+++ b/src/video_core/renderer_opengl/present/layer.h
@@ -13,6 +13,8 @@ namespace Layout {
struct FramebufferLayout;
}
+struct PresentFilters;
+
namespace Service::android {
enum class PixelFormat : u32;
};
@@ -44,14 +46,15 @@ struct ScreenRectVertex;
class Layer {
public:
- explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory);
+ explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory,
+ const PresentFilters& filters);
~Layer();
GLuint ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix,
std::array<ScreenRectVertex, 4>& out_vertices,
ProgramManager& program_manager,
const Tegra::FramebufferConfig& framebuffer,
- const Layout::FramebufferLayout& layout);
+ const Layout::FramebufferLayout& layout, bool invert_y);
private:
/// Loads framebuffer from emulated memory into the active OpenGL texture.
@@ -65,6 +68,7 @@ private:
private:
RasterizerOpenGL& rasterizer;
Tegra::MaxwellDeviceMemoryManager& device_memory;
+ const PresentFilters& filters;
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp
index 4d681606b..d8b6a11cb 100644
--- a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp
+++ b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp
@@ -37,7 +37,7 @@ WindowAdaptPass::~WindowAdaptPass() = default;
void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers,
std::span<const Tegra::FramebufferConfig> framebuffers,
- const Layout::FramebufferLayout& layout) {
+ const Layout::FramebufferLayout& layout, bool invert_y) {
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
@@ -51,7 +51,7 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li
auto layer_it = layers.begin();
for (size_t i = 0; i < layer_count; i++) {
textures[i] = layer_it->ConfigureDraw(matrices[i], vertices[i], program_manager,
- framebuffers[i], layout);
+ framebuffers[i], layout, invert_y);
layer_it++;
}
@@ -92,6 +92,21 @@ void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::li
glClear(GL_COLOR_BUFFER_BIT);
for (size_t i = 0; i < layer_count; i++) {
+ switch (framebuffers[i].blending) {
+ case Tegra::BlendMode::Opaque:
+ default:
+ glDisablei(GL_BLEND, 0);
+ break;
+ case Tegra::BlendMode::Premultiplied:
+ glEnablei(GL_BLEND, 0);
+ glBlendFuncSeparatei(0, GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
+ break;
+ case Tegra::BlendMode::Coverage:
+ glEnablei(GL_BLEND, 0);
+ glBlendFuncSeparatei(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO);
+ break;
+ }
+
glBindTextureUnit(0, textures[i]);
glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE,
matrices[i].data());
diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.h b/src/video_core/renderer_opengl/present/window_adapt_pass.h
index 00975a9c6..0a8bcef2f 100644
--- a/src/video_core/renderer_opengl/present/window_adapt_pass.h
+++ b/src/video_core/renderer_opengl/present/window_adapt_pass.h
@@ -31,7 +31,7 @@ public:
void DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers,
std::span<const Tegra::FramebufferConfig> framebuffers,
- const Layout::FramebufferLayout& layout);
+ const Layout::FramebufferLayout& layout, bool invert_y);
private:
const Device& device;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e33a32592..5fb54635d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -16,6 +16,8 @@
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/telemetry_session.h"
+#include "video_core/capture.h"
+#include "video_core/present.h"
#include "video_core/renderer_opengl/gl_blit_screen.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -120,7 +122,15 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
}
blit_screen = std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker,
- program_manager, device);
+ program_manager, device, PresentFiltersForDisplay);
+ blit_applet =
+ std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker, program_manager,
+ device, PresentFiltersForAppletCapture);
+ capture_framebuffer.Create();
+ capture_renderbuffer.Create();
+ glBindRenderbuffer(GL_RENDERBUFFER, capture_renderbuffer.handle);
+ glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, VideoCore::Capture::LinearWidth,
+ VideoCore::Capture::LinearHeight);
}
RendererOpenGL::~RendererOpenGL() = default;
@@ -130,10 +140,11 @@ void RendererOpenGL::Composite(std::span<const Tegra::FramebufferConfig> framebu
return;
}
+ RenderAppletCaptureLayer(framebuffers);
RenderScreenshot(framebuffers);
state_tracker.BindFramebuffer(0);
- blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout());
+ blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout(), false);
++m_current_frame;
@@ -159,11 +170,8 @@ void RendererOpenGL::AddTelemetryFields() {
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
}
-void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
- if (!renderer_settings.screenshot_requested) {
- return;
- }
-
+void RendererOpenGL::RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
+ const Layout::FramebufferLayout& layout, void* dst) {
GLint old_read_fb;
GLint old_draw_fb;
glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
@@ -173,29 +181,86 @@ void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig>
screenshot_framebuffer.Create();
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
- const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
-
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
- blit_screen->DrawScreen(framebuffers, layout);
+ blit_screen->DrawScreen(framebuffers, layout, false);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
- glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
- renderer_settings.screenshot_bits);
+ glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, dst);
screenshot_framebuffer.Release();
glDeleteRenderbuffers(1, &renderbuffer);
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+}
+
+void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
+ if (!renderer_settings.screenshot_requested) {
+ return;
+ }
+
+ RenderToBuffer(framebuffers, renderer_settings.screenshot_framebuffer_layout,
+ renderer_settings.screenshot_bits);
renderer_settings.screenshot_complete_callback(true);
renderer_settings.screenshot_requested = false;
}
+void RendererOpenGL::RenderAppletCaptureLayer(
+ std::span<const Tegra::FramebufferConfig> framebuffers) {
+ GLint old_read_fb;
+ GLint old_draw_fb;
+ glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+
+ glBindFramebuffer(GL_FRAMEBUFFER, capture_framebuffer.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ capture_renderbuffer.handle);
+
+ blit_applet->DrawScreen(framebuffers, VideoCore::Capture::Layout, true);
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+}
+
+std::vector<u8> RendererOpenGL::GetAppletCaptureBuffer() {
+ using namespace VideoCore::Capture;
+
+ std::vector<u8> linear(TiledSize);
+ std::vector<u8> out(TiledSize);
+
+ GLint old_read_fb;
+ GLint old_draw_fb;
+ GLint old_pixel_pack_buffer;
+ GLint old_pack_row_length;
+ glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
+ glGetIntegerv(GL_PIXEL_PACK_BUFFER_BINDING, &old_pixel_pack_buffer);
+ glGetIntegerv(GL_PACK_ROW_LENGTH, &old_pack_row_length);
+
+ glBindFramebuffer(GL_FRAMEBUFFER, capture_framebuffer.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ capture_renderbuffer.handle);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+ glReadPixels(0, 0, LinearWidth, LinearHeight, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
+ linear.data());
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, old_pixel_pack_buffer);
+ glPixelStorei(GL_PACK_ROW_LENGTH, old_pack_row_length);
+
+ Tegra::Texture::SwizzleTexture(out, linear, BytesPerPixel, LinearWidth, LinearHeight,
+ LinearDepth, BlockHeight, BlockDepth);
+
+ return out;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c4625c96e..60d6a1477 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -42,6 +42,8 @@ public:
void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override;
+ std::vector<u8> GetAppletCaptureBuffer() override;
+
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
@@ -52,7 +54,11 @@ public:
private:
void AddTelemetryFields();
+
+ void RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
+ const Layout::FramebufferLayout& layout, void* dst);
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
+ void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers);
Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
@@ -64,8 +70,11 @@ private:
ProgramManager program_manager;
RasterizerOpenGL rasterizer;
OGLFramebuffer screenshot_framebuffer;
+ OGLFramebuffer capture_framebuffer;
+ OGLRenderbuffer capture_renderbuffer;
std::unique_ptr<BlitScreen> blit_screen;
+ std::unique_ptr<BlitScreen> blit_applet;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp
index cfc04be44..3847a9a13 100644
--- a/src/video_core/renderer_vulkan/present/layer.cpp
+++ b/src/video_core/renderer_vulkan/present/layer.cpp
@@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "video_core/present.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "common/settings.h"
@@ -48,12 +49,12 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, size_t image_count_,
- VkExtent2D output_size, VkDescriptorSetLayout layout)
+ VkExtent2D output_size, VkDescriptorSetLayout layout, const PresentFilters& filters_)
: device(device_), memory_allocator(memory_allocator_), scheduler(scheduler_),
- device_memory(device_memory_), image_count(image_count_) {
+ device_memory(device_memory_), filters(filters_), image_count(image_count_) {
CreateDescriptorPool();
CreateDescriptorSets(layout);
- if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) {
+ if (filters.get_scaling_filter() == Settings::ScalingFilter::Fsr) {
CreateFSR(output_size);
}
}
@@ -171,11 +172,11 @@ void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
}
void Layer::SetAntiAliasPass() {
- if (anti_alias && anti_alias_setting == Settings::values.anti_aliasing.GetValue()) {
+ if (anti_alias && anti_alias_setting == filters.get_anti_aliasing()) {
return;
}
- anti_alias_setting = Settings::values.anti_aliasing.GetValue();
+ anti_alias_setting = filters.get_anti_aliasing();
const VkExtent2D render_area{
.width = Settings::values.resolution_info.ScaleUp(raw_width),
@@ -270,9 +271,11 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
const u64 linear_size{GetSizeInBytes(framebuffer)};
const u64 tiled_size{Tegra::Texture::CalculateSize(
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
- Tegra::Texture::UnswizzleTexture(
- mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
- bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
+ if (host_ptr) {
+ Tegra::Texture::UnswizzleTexture(
+ mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
+ bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
+ }
const VkBufferImageCopy copy{
.bufferOffset = image_offset,
diff --git a/src/video_core/renderer_vulkan/present/layer.h b/src/video_core/renderer_vulkan/present/layer.h
index 88d43fc5f..f5effdcd7 100644
--- a/src/video_core/renderer_vulkan/present/layer.h
+++ b/src/video_core/renderer_vulkan/present/layer.h
@@ -11,6 +11,8 @@ namespace Layout {
struct FramebufferLayout;
}
+struct PresentFilters;
+
namespace Tegra {
struct FramebufferConfig;
}
@@ -37,7 +39,8 @@ class Layer final {
public:
explicit Layer(const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler,
Tegra::MaxwellDeviceMemoryManager& device_memory, size_t image_count,
- VkExtent2D output_size, VkDescriptorSetLayout layout);
+ VkExtent2D output_size, VkDescriptorSetLayout layout,
+ const PresentFilters& filters);
~Layer();
void ConfigureDraw(PresentPushConstants* out_push_constants,
@@ -71,6 +74,7 @@ private:
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
Tegra::MaxwellDeviceMemoryManager& device_memory;
+ const PresentFilters& filters;
const size_t image_count{};
vk::DescriptorPool descriptor_pool{};
vk::DescriptorSets descriptor_sets{};
diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp
index 6ee16595d..7f27c7c1b 100644
--- a/src/video_core/renderer_vulkan/present/util.cpp
+++ b/src/video_core/renderer_vulkan/present/util.cpp
@@ -362,10 +362,10 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
});
}
-vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
- vk::PipelineLayout& layout,
- std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
- bool enable_blending) {
+static vk::Pipeline CreateWrappedPipelineImpl(
+ const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
+ VkPipelineColorBlendAttachmentState blending) {
const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
@@ -443,30 +443,6 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
.alphaToOneEnable = VK_FALSE,
};
- constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
- .blendEnable = VK_FALSE,
- .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
- .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
- .colorBlendOp = VK_BLEND_OP_ADD,
- .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
- .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
- .alphaBlendOp = VK_BLEND_OP_ADD,
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
- };
-
- constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{
- .blendEnable = VK_TRUE,
- .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
- .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
- .colorBlendOp = VK_BLEND_OP_ADD,
- .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
- .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
- .alphaBlendOp = VK_BLEND_OP_ADD,
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
- };
-
const VkPipelineColorBlendStateCreateInfo color_blend_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = nullptr,
@@ -474,8 +450,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1,
- .pAttachments =
- enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled,
+ .pAttachments = &blending,
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
};
@@ -515,6 +490,63 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp
});
}
+vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
+ vk::PipelineLayout& layout,
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
+ constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
+ color_blend_attachment_disabled);
+}
+
+vk::Pipeline CreateWrappedPremultipliedBlendingPipeline(
+ const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
+ constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_premultiplied{
+ .blendEnable = VK_TRUE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ONE,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
+ color_blend_attachment_premultiplied);
+}
+
+vk::Pipeline CreateWrappedCoverageBlendingPipeline(
+ const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) {
+ constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_coverage{
+ .blendEnable = VK_TRUE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ return CreateWrappedPipelineImpl(device, renderpass, layout, shaders,
+ color_blend_attachment_coverage);
+}
+
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
VkSampler sampler, VkImageView view,
VkDescriptorSet set, u32 binding) {
diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h
index 1104aaa15..5b22f0fa8 100644
--- a/src/video_core/renderer_vulkan/present/util.h
+++ b/src/video_core/renderer_vulkan/present/util.h
@@ -42,8 +42,13 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device,
vk::DescriptorSetLayout& layout);
vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass,
vk::PipelineLayout& layout,
- std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders,
- bool enable_blending = false);
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
+vk::Pipeline CreateWrappedPremultipliedBlendingPipeline(
+ const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
+vk::Pipeline CreateWrappedCoverageBlendingPipeline(
+ const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout,
+ std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders);
VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images,
VkSampler sampler, VkImageView view,
VkDescriptorSet set, u32 binding);
diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp
index c5db0230d..22ffacf11 100644
--- a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp
+++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp
@@ -22,7 +22,7 @@ WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format,
CreatePipelineLayout();
CreateVertexShader();
CreateRenderPass(frame_format);
- CreatePipeline();
+ CreatePipelines();
}
WindowAdaptPass::~WindowAdaptPass() = default;
@@ -34,7 +34,6 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
const VkFramebuffer host_framebuffer{*dst->framebuffer};
const VkRenderPass renderpass{*render_pass};
- const VkPipeline graphics_pipeline{*pipeline};
const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout};
const VkExtent2D render_area{
.width = dst->width,
@@ -44,9 +43,23 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
const size_t layer_count = configs.size();
std::vector<PresentPushConstants> push_constants(layer_count);
std::vector<VkDescriptorSet> descriptor_sets(layer_count);
+ std::vector<VkPipeline> graphics_pipelines(layer_count);
auto layer_it = layers.begin();
for (size_t i = 0; i < layer_count; i++) {
+ switch (configs[i].blending) {
+ case Tegra::BlendMode::Opaque:
+ default:
+ graphics_pipelines[i] = *opaque_pipeline;
+ break;
+ case Tegra::BlendMode::Premultiplied:
+ graphics_pipelines[i] = *premultiplied_pipeline;
+ break;
+ case Tegra::BlendMode::Coverage:
+ graphics_pipelines[i] = *coverage_pipeline;
+ break;
+ }
+
layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler,
image_index, configs[i], layout);
layer_it++;
@@ -77,8 +90,8 @@ void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, s
BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area);
cmdbuf.ClearAttachments({clear_attachment}, {clear_rect});
- cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
for (size_t i = 0; i < layer_count; i++) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipelines[i]);
cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT,
push_constants[i]);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0,
@@ -129,9 +142,13 @@ void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) {
render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED);
}
-void WindowAdaptPass::CreatePipeline() {
- pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
- std::tie(vertex_shader, fragment_shader), false);
+void WindowAdaptPass::CreatePipelines() {
+ opaque_pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout,
+ std::tie(vertex_shader, fragment_shader));
+ premultiplied_pipeline = CreateWrappedPremultipliedBlendingPipeline(
+ device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader));
+ coverage_pipeline = CreateWrappedCoverageBlendingPipeline(
+ device, render_pass, pipeline_layout, std::tie(vertex_shader, fragment_shader));
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.h b/src/video_core/renderer_vulkan/present/window_adapt_pass.h
index 0e2edfc31..cf667a4fc 100644
--- a/src/video_core/renderer_vulkan/present/window_adapt_pass.h
+++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.h
@@ -42,7 +42,7 @@ private:
void CreatePipelineLayout();
void CreateVertexShader();
void CreateRenderPass(VkFormat frame_format);
- void CreatePipeline();
+ void CreatePipelines();
private:
const Device& device;
@@ -52,7 +52,9 @@ private:
vk::ShaderModule vertex_shader;
vk::ShaderModule fragment_shader;
vk::RenderPass render_pass;
- vk::Pipeline pipeline;
+ vk::Pipeline opaque_pipeline;
+ vk::Pipeline premultiplied_pipeline;
+ vk::Pipeline coverage_pipeline;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 48a105327..d50417116 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -19,7 +19,9 @@
#include "core/core_timing.h"
#include "core/frontend/graphics_context.h"
#include "core/telemetry_session.h"
+#include "video_core/capture.h"
#include "video_core/gpu.h"
+#include "video_core/present.h"
#include "video_core/renderer_vulkan/present/util.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
@@ -38,6 +40,20 @@
namespace Vulkan {
namespace {
+
+constexpr VkExtent2D CaptureImageSize{
+ .width = VideoCore::Capture::LinearWidth,
+ .height = VideoCore::Capture::LinearHeight,
+};
+
+constexpr VkExtent3D CaptureImageExtent{
+ .width = VideoCore::Capture::LinearWidth,
+ .height = VideoCore::Capture::LinearHeight,
+ .depth = VideoCore::Capture::LinearDepth,
+};
+
+constexpr VkFormat CaptureFormat = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+
std::string GetReadableVersion(u32 version) {
return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
VK_VERSION_PATCH(version));
@@ -99,10 +115,15 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
render_window.GetFramebufferLayout().height),
present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
surface),
- blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler),
- blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler),
+ blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler,
+ PresentFiltersForDisplay),
+ blit_capture(device_memory, device, memory_allocator, present_manager, scheduler,
+ PresentFiltersForDisplay),
+ blit_applet(device_memory, device, memory_allocator, present_manager, scheduler,
+ PresentFiltersForAppletCapture),
rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker,
- scheduler) {
+ scheduler),
+ applet_frame() {
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld);
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
@@ -125,6 +146,8 @@ void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebu
SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
+ RenderAppletCaptureLayer(framebuffers);
+
if (!render_window.IsShown()) {
return;
}
@@ -167,30 +190,20 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
}
-void Vulkan::RendererVulkan::RenderScreenshot(
- std::span<const Tegra::FramebufferConfig> framebuffers) {
- if (!renderer_settings.screenshot_requested) {
- return;
- }
-
- constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM};
- const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
-
+vk::Buffer RendererVulkan::RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
+ const Layout::FramebufferLayout& layout, VkFormat format,
+ VkDeviceSize buffer_size) {
auto frame = [&]() {
Frame f{};
- f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height},
- ScreenshotFormat);
- f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat);
- f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat);
+ f.image =
+ CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, format);
+ f.image_view = CreateWrappedImageView(device, f.image, format);
+ f.framebuffer = blit_capture.CreateFramebuffer(layout, *f.image_view, format);
return f;
}();
- blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1,
- VK_FORMAT_B8G8R8A8_UNORM);
-
- const auto dst_buffer = CreateWrappedBuffer(
- memory_allocator, static_cast<VkDeviceSize>(layout.width * layout.height * 4),
- MemoryUsage::Download);
+ auto dst_buffer = CreateWrappedBuffer(memory_allocator, buffer_size, MemoryUsage::Download);
+ blit_capture.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, format);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
@@ -198,15 +211,68 @@ void Vulkan::RendererVulkan::RenderScreenshot(
VkExtent3D{layout.width, layout.height, 1});
});
- // Ensure the copy is fully completed before saving the screenshot
+ // Ensure the copy is fully completed before saving the capture
scheduler.Finish();
- // Copy backing image data to the QImage screenshot buffer
+ // Copy backing image data to the capture buffer
dst_buffer.Invalidate();
+ return dst_buffer;
+}
+
+void RendererVulkan::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) {
+ if (!renderer_settings.screenshot_requested) {
+ return;
+ }
+
+ const auto& layout{renderer_settings.screenshot_framebuffer_layout};
+ const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM,
+ layout.width * layout.height * 4);
+
std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
dst_buffer.Mapped().size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
+std::vector<u8> RendererVulkan::GetAppletCaptureBuffer() {
+ using namespace VideoCore::Capture;
+
+ std::vector<u8> out(VideoCore::Capture::TiledSize);
+
+ if (!applet_frame.image) {
+ return out;
+ }
+
+ const auto dst_buffer =
+ CreateWrappedBuffer(memory_allocator, VideoCore::Capture::TiledSize, MemoryUsage::Download);
+
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([&](vk::CommandBuffer cmdbuf) {
+ DownloadColorImage(cmdbuf, *applet_frame.image, *dst_buffer, CaptureImageExtent);
+ });
+
+ // Ensure the copy is fully completed before writing the capture
+ scheduler.Finish();
+
+ // Swizzle image data to the capture buffer
+ dst_buffer.Invalidate();
+ Tegra::Texture::SwizzleTexture(out, dst_buffer.Mapped(), BytesPerPixel, LinearWidth,
+ LinearHeight, LinearDepth, BlockHeight, BlockDepth);
+
+ return out;
+}
+
+void RendererVulkan::RenderAppletCaptureLayer(
+ std::span<const Tegra::FramebufferConfig> framebuffers) {
+ if (!applet_frame.image) {
+ applet_frame.image = CreateWrappedImage(memory_allocator, CaptureImageSize, CaptureFormat);
+ applet_frame.image_view = CreateWrappedImageView(device, applet_frame.image, CaptureFormat);
+ applet_frame.framebuffer = blit_applet.CreateFramebuffer(
+ VideoCore::Capture::Layout, *applet_frame.image_view, CaptureFormat);
+ }
+
+ blit_applet.DrawToFrame(rasterizer, &applet_frame, framebuffers, VideoCore::Capture::Layout, 1,
+ CaptureFormat);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index c6d8a0f21..fb9d83412 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -48,6 +48,8 @@ public:
void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override;
+ std::vector<u8> GetAppletCaptureBuffer() override;
+
VideoCore::RasterizerInterface* ReadRasterizer() override {
return &rasterizer;
}
@@ -59,7 +61,11 @@ public:
private:
void Report() const;
+ vk::Buffer RenderToBuffer(std::span<const Tegra::FramebufferConfig> framebuffers,
+ const Layout::FramebufferLayout& layout, VkFormat format,
+ VkDeviceSize buffer_size);
void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers);
+ void RenderAppletCaptureLayer(std::span<const Tegra::FramebufferConfig> framebuffers);
Core::TelemetrySession& telemetry_session;
Tegra::MaxwellDeviceMemoryManager& device_memory;
@@ -79,9 +85,12 @@ private:
Swapchain swapchain;
PresentManager present_manager;
BlitScreen blit_swapchain;
- BlitScreen blit_screenshot;
+ BlitScreen blit_capture;
+ BlitScreen blit_applet;
RasterizerVulkan rasterizer;
std::optional<TurboMode> turbo_mode;
+
+ Frame applet_frame;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 2275fcc46..b7797f833 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/framebuffer_config.h"
+#include "video_core/present.h"
#include "video_core/renderer_vulkan/present/filters.h"
#include "video_core/renderer_vulkan/present/layer.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
@@ -12,9 +13,9 @@ namespace Vulkan {
BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_,
MemoryAllocator& memory_allocator_, PresentManager& present_manager_,
- Scheduler& scheduler_)
+ Scheduler& scheduler_, const PresentFilters& filters_)
: device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_},
- present_manager{present_manager_}, scheduler{scheduler_}, image_count{1},
+ present_manager{present_manager_}, scheduler{scheduler_}, filters{filters_}, image_count{1},
swapchain_view_format{VK_FORMAT_B8G8R8A8_UNORM} {}
BlitScreen::~BlitScreen() = default;
@@ -27,7 +28,7 @@ void BlitScreen::WaitIdle() {
void BlitScreen::SetWindowAdaptPass() {
layers.clear();
- scaling_filter = Settings::values.scaling_filter.GetValue();
+ scaling_filter = filters.get_scaling_filter();
switch (scaling_filter) {
case Settings::ScalingFilter::NearestNeighbor:
@@ -59,7 +60,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
bool presentation_recreate_required = false;
// Recreate dynamic resources if the adapting filter changed
- if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue()) {
+ if (!window_adapt || scaling_filter != filters.get_scaling_filter()) {
resource_update_required = true;
}
@@ -102,7 +103,7 @@ void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
while (layers.size() < framebuffers.size()) {
layers.emplace_back(device, memory_allocator, scheduler, device_memory, image_count,
- window_size, window_adapt->GetDescriptorSetLayout());
+ window_size, window_adapt->GetDescriptorSetLayout(), filters);
}
// Perform the draw
@@ -119,8 +120,7 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& l
VkFormat current_view_format) {
const bool format_updated =
std::exchange(swapchain_view_format, current_view_format) != current_view_format;
- if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue() ||
- format_updated) {
+ if (!window_adapt || scaling_filter != filters.get_scaling_filter() || format_updated) {
WaitIdle();
SetWindowAdaptPass();
}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index cbdf2d5d0..531c57fc5 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -16,6 +16,8 @@ namespace Core {
class System;
}
+struct PresentFilters;
+
namespace Tegra {
struct FramebufferConfig;
}
@@ -47,7 +49,7 @@ class BlitScreen {
public:
explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, const Device& device,
MemoryAllocator& memory_allocator, PresentManager& present_manager,
- Scheduler& scheduler);
+ Scheduler& scheduler, const PresentFilters& filters);
~BlitScreen();
void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame,
@@ -70,6 +72,7 @@ private:
MemoryAllocator& memory_allocator;
PresentManager& present_manager;
Scheduler& scheduler;
+ const PresentFilters& filters;
std::size_t image_count{};
std::size_t image_index{};
VkFormat swapchain_view_format{};
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 31001d142..e5e1e3ab6 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -368,7 +368,7 @@ u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
return static_cast<u32>(device.GetStorageBufferAlignment());
}
-void BufferCacheRuntime::TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept {
+void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noexcept {
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
it->ResetUsageTracking();
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index e273f4988..efe960258 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -81,7 +81,7 @@ public:
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool);
- void TickFrame(VideoCommon::SlotVector<Buffer>& slot_buffers) noexcept;
+ void TickFrame(Common::SlotVector<Buffer>& slot_buffers) noexcept;
void Finish();
@@ -181,7 +181,6 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
- static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0dbde65d6..aaeb5ef93 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -20,11 +20,11 @@ struct ResolutionScalingInfo;
namespace Vulkan {
+using Common::SlotVector;
using VideoCommon::ImageId;
using VideoCommon::NUM_RT;
using VideoCommon::Region2D;
using VideoCommon::RenderTargets;
-using VideoCommon::SlotVector;
using VideoCore::Surface::PixelFormat;
class BlitImageHelper;
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
deleted file mode 100644
index 3ffa2a661..000000000
--- a/src/video_core/texture_cache/slot_vector.h
+++ /dev/null
@@ -1,227 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <algorithm>
-#include <bit>
-#include <numeric>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/polyfill_ranges.h"
-
-namespace VideoCommon {
-
-struct SlotId {
- static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
-
- constexpr auto operator<=>(const SlotId&) const noexcept = default;
-
- constexpr explicit operator bool() const noexcept {
- return index != INVALID_INDEX;
- }
-
- u32 index = INVALID_INDEX;
-};
-
-template <class T>
- requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T>
-class SlotVector {
-public:
- class Iterator {
- friend SlotVector<T>;
-
- public:
- constexpr Iterator() = default;
-
- Iterator& operator++() noexcept {
- const u64* const bitset = slot_vector->stored_bitset.data();
- const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
- if (id.index < size) {
- do {
- ++id.index;
- } while (id.index < size && !IsValid(bitset));
- if (id.index == size) {
- id.index = SlotId::INVALID_INDEX;
- }
- }
- return *this;
- }
-
- Iterator operator++(int) noexcept {
- const Iterator copy{*this};
- ++*this;
- return copy;
- }
-
- bool operator==(const Iterator& other) const noexcept {
- return id.index == other.id.index;
- }
-
- bool operator!=(const Iterator& other) const noexcept {
- return id.index != other.id.index;
- }
-
- std::pair<SlotId, T*> operator*() const noexcept {
- return {id, std::addressof((*slot_vector)[id])};
- }
-
- T* operator->() const noexcept {
- return std::addressof((*slot_vector)[id]);
- }
-
- private:
- Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
- : slot_vector{slot_vector_}, id{id_} {}
-
- bool IsValid(const u64* bitset) const noexcept {
- return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
- }
-
- SlotVector<T>* slot_vector;
- SlotId id;
- };
-
- ~SlotVector() noexcept {
- size_t index = 0;
- for (u64 bits : stored_bitset) {
- for (size_t bit = 0; bits; ++bit, bits >>= 1) {
- if ((bits & 1) != 0) {
- values[index + bit].object.~T();
- }
- }
- index += 64;
- }
- delete[] values;
- }
-
- [[nodiscard]] T& operator[](SlotId id) noexcept {
- ValidateIndex(id);
- return values[id.index].object;
- }
-
- [[nodiscard]] const T& operator[](SlotId id) const noexcept {
- ValidateIndex(id);
- return values[id.index].object;
- }
-
- template <typename... Args>
- [[nodiscard]] SlotId insert(Args&&... args) noexcept {
- const u32 index = FreeValueIndex();
- new (&values[index].object) T(std::forward<Args>(args)...);
- SetStorageBit(index);
-
- return SlotId{index};
- }
-
- void erase(SlotId id) noexcept {
- values[id.index].object.~T();
- free_list.push_back(id.index);
- ResetStorageBit(id.index);
- }
-
- [[nodiscard]] Iterator begin() noexcept {
- const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
- if (it == stored_bitset.end()) {
- return end();
- }
- const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
- const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
- return Iterator(this, first_id);
- }
-
- [[nodiscard]] Iterator end() noexcept {
- return Iterator(this, SlotId{SlotId::INVALID_INDEX});
- }
-
- [[nodiscard]] size_t size() const noexcept {
- return values_capacity - free_list.size();
- }
-
-private:
- struct NonTrivialDummy {
- NonTrivialDummy() noexcept {}
- };
-
- union Entry {
- Entry() noexcept : dummy{} {}
- ~Entry() noexcept {}
-
- NonTrivialDummy dummy;
- T object;
- };
-
- void SetStorageBit(u32 index) noexcept {
- stored_bitset[index / 64] |= u64(1) << (index % 64);
- }
-
- void ResetStorageBit(u32 index) noexcept {
- stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
- }
-
- bool ReadStorageBit(u32 index) noexcept {
- return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
- }
-
- void ValidateIndex(SlotId id) const noexcept {
- DEBUG_ASSERT(id);
- DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
- DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
- }
-
- [[nodiscard]] u32 FreeValueIndex() noexcept {
- if (free_list.empty()) {
- Reserve(values_capacity ? (values_capacity << 1) : 1);
- }
- const u32 free_index = free_list.back();
- free_list.pop_back();
- return free_index;
- }
-
- void Reserve(size_t new_capacity) noexcept {
- Entry* const new_values = new Entry[new_capacity];
- size_t index = 0;
- for (u64 bits : stored_bitset) {
- for (size_t bit = 0; bits; ++bit, bits >>= 1) {
- const size_t i = index + bit;
- if ((bits & 1) == 0) {
- continue;
- }
- T& old_value = values[i].object;
- new (&new_values[i].object) T(std::move(old_value));
- old_value.~T();
- }
- index += 64;
- }
-
- stored_bitset.resize((new_capacity + 63) / 64);
-
- const size_t old_free_size = free_list.size();
- free_list.resize(old_free_size + (new_capacity - values_capacity));
- std::iota(free_list.begin() + old_free_size, free_list.end(),
- static_cast<u32>(values_capacity));
-
- delete[] values;
- values = new_values;
- values_capacity = new_capacity;
- }
-
- Entry* values = nullptr;
- size_t values_capacity = 0;
-
- std::vector<u64> stored_bitset;
- std::vector<u32> free_list;
-};
-
-} // namespace VideoCommon
-
-template <>
-struct std::hash<VideoCommon::SlotId> {
- size_t operator()(const VideoCommon::SlotId& id) const noexcept {
- return std::hash<u32>{}(id.index);
- }
-};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a20c956ff..3a1cc060e 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -746,7 +746,13 @@ std::pair<typename P::ImageView*, bool> TextureCache<P>::TryFindFramebufferImage
}();
const auto GetImageViewForFramebuffer = [&](ImageId image_id) {
- const ImageViewInfo info{ImageViewType::e2D, view_format};
+ ImageViewInfo info{ImageViewType::e2D, view_format};
+ if (config.blending == Tegra::BlendMode::Opaque) {
+ info.x_source = static_cast<u8>(SwizzleSource::R);
+ info.y_source = static_cast<u8>(SwizzleSource::G);
+ info.z_source = static_cast<u8>(SwizzleSource::B);
+ info.w_source = static_cast<u8>(SwizzleSource::OneFloat);
+ }
return std::make_pair(&slot_image_views[FindOrEmplaceImageView(image_id, info)],
slot_images[image_id].IsRescaled());
};
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index e7b910121..da98a634b 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -21,6 +21,7 @@
#include "common/lru_cache.h"
#include "common/polyfill_ranges.h"
#include "common/scratch_buffer.h"
+#include "common/slot_vector.h"
#include "common/thread_worker.h"
#include "video_core/compatible_formats.h"
#include "video_core/control/channel_state_cache.h"
@@ -32,7 +33,6 @@
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/render_targets.h"
-#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
@@ -451,16 +451,16 @@ private:
struct PendingDownload {
bool is_swizzle;
size_t async_buffer_id;
- SlotId object_id;
+ Common::SlotId object_id;
};
- SlotVector<Image> slot_images;
- SlotVector<ImageMapView> slot_map_views;
- SlotVector<ImageView> slot_image_views;
- SlotVector<ImageAlloc> slot_image_allocs;
- SlotVector<Sampler> slot_samplers;
- SlotVector<Framebuffer> slot_framebuffers;
- SlotVector<BufferDownload> slot_buffer_downloads;
+ Common::SlotVector<Image> slot_images;
+ Common::SlotVector<ImageMapView> slot_map_views;
+ Common::SlotVector<ImageView> slot_image_views;
+ Common::SlotVector<ImageAlloc> slot_image_allocs;
+ Common::SlotVector<Sampler> slot_samplers;
+ Common::SlotVector<Framebuffer> slot_framebuffers;
+ Common::SlotVector<BufferDownload> slot_buffer_downloads;
// TODO: This data structure is not optimal and it should be reworked
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
index 0453456b4..07c304386 100644
--- a/src/video_core/texture_cache/types.h
+++ b/src/video_core/texture_cache/types.h
@@ -5,21 +5,21 @@
#include "common/common_funcs.h"
#include "common/common_types.h"
-#include "video_core/texture_cache/slot_vector.h"
+#include "common/slot_vector.h"
namespace VideoCommon {
constexpr size_t NUM_RT = 8;
constexpr size_t MAX_MIP_LEVELS = 14;
-constexpr SlotId CORRUPT_ID{0xfffffffe};
+constexpr Common::SlotId CORRUPT_ID{0xfffffffe};
-using ImageId = SlotId;
-using ImageMapId = SlotId;
-using ImageViewId = SlotId;
-using ImageAllocId = SlotId;
-using SamplerId = SlotId;
-using FramebufferId = SlotId;
+using ImageId = Common::SlotId;
+using ImageMapId = Common::SlotId;
+using ImageViewId = Common::SlotId;
+using ImageAllocId = Common::SlotId;
+using SamplerId = Common::SlotId;
+using FramebufferId = Common::SlotId;
/// Fake image ID for null image views
constexpr ImageId NULL_IMAGE_ID{0};