diff options
Diffstat (limited to 'src/video_core')
21 files changed, 203 insertions, 183 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3b2fe01da..7f79111e0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -274,6 +274,7 @@ add_library(video_core STATIC vulkan_common/vulkan_wrapper.h vulkan_common/nsight_aftermath_tracker.cpp vulkan_common/nsight_aftermath_tracker.h + vulkan_common/vma.cpp ) create_target_directory_groups(video_core) @@ -291,7 +292,7 @@ target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) -target_link_libraries(video_core PRIVATE sirit Vulkan::Headers vma) +target_link_libraries(video_core PRIVATE sirit Vulkan::Headers GPUOpen::VulkanMemoryAllocator) if (ENABLE_NSIGHT_AFTERMATH) if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) @@ -324,6 +325,9 @@ else() # xbyak set_source_files_properties(macro/macro_jit_x64.cpp PROPERTIES COMPILE_OPTIONS "-Wno-conversion;-Wno-shadow") + + # VMA + set_source_files_properties(vulkan_common/vma.cpp PROPERTIES COMPILE_OPTIONS "-Wno-conversion;-Wno-unused-variable;-Wno-unused-parameter;-Wno-missing-field-initializers") endif() if (ARCHITECTURE_x86_64) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b5ed3380f..6ed4b78f2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -234,9 +234,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (has_new_downloads) { memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); } - tmp_buffer.resize_destructive(amount); - cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); - cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); + + Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( + cpu_memory, *cpu_src_address, amount, &tmp_buffer); + tmp.SetAddressAndSize(*cpu_dest_address, amount); return true; } diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 551929824..9f1b340a9 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -5,6 +5,7 @@ #include "common/microprofile.h" #include "common/settings.h" #include "core/core.h" +#include "core/memory.h" #include "video_core/dma_pusher.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" @@ -12,6 +13,8 @@ namespace Tegra { +constexpr u32 MacroRegistersStart = 0xE00; + DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, @@ -74,25 +77,16 @@ bool DmaPusher::Step() { } // Push buffer non-empty, read a word - command_headers.resize_destructive(command_list_header.size); - constexpr u32 MacroRegistersStart = 0xE00; - if (dma_state.method < MacroRegistersStart) { - if (Settings::IsGPULevelHigh()) { - memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); - } else { - memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), - command_list_header.size * sizeof(u32)); - } - } else { - const size_t copy_size = command_list_header.size * sizeof(u32); + if (dma_state.method >= MacroRegistersStart) { if (subchannels[dma_state.subchannel]) { - subchannels[dma_state.subchannel]->current_dirty = - memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); + subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty( + dma_state.dma_get, command_list_header.size * sizeof(u32)); } - memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); } - ProcessCommands(command_headers); + Core::Memory::GpuGuestMemory<Tegra::CommandHeader, + Core::Memory::GuestMemoryFlags::UnsafeRead> + headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); + ProcessCommands(headers); } return true; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 7f5a0c29d..bc64d4486 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -5,6 +5,7 @@ #include "common/algorithm.h" #include "common/assert.h" +#include "core/memory.h" #include "video_core/engines/engine_upload.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -46,15 +47,11 @@ void State::ProcessData(const u32* data, size_t num_data) { void State::ProcessData(std::span<const u8> read_buffer) { const GPUVAddr address{regs.dest.Address()}; if (is_linear) { - if (regs.line_count == 1) { - rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); - } else { - for (size_t line = 0; line < regs.line_count; ++line) { - const GPUVAddr dest_line = address + line * regs.dest.pitch; - std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, - regs.line_length_in); - rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); - } + for (size_t line = 0; line < regs.line_count; ++line) { + const GPUVAddr dest_line = address + line * regs.dest.pitch; + std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, + regs.line_length_in); + rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); } } else { u32 width = regs.dest.width; @@ -70,13 +67,14 @@ void State::ProcessData(std::span<const u8> read_buffer) { const std::size_t dst_size = Tegra::Texture::CalculateSize( true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, regs.dest.BlockHeight(), regs.dest.BlockDepth()); - tmp_buffer.resize_destructive(dst_size); - memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); - Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, - regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, - x_elements, regs.line_count, regs.dest.BlockHeight(), + + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp(memory_manager, address, dst_size, &tmp_buffer); + + Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, + regs.dest.depth, x_offset, regs.dest.y, x_elements, + regs.line_count, regs.dest.BlockHeight(), regs.dest.BlockDepth(), regs.line_length_in); - memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 601095f03..a38d9528a 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -84,7 +84,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - return tic_entry; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 62d70e9f3..c3696096d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/memory.h" #include "video_core/dirty_flags.h" #include "video_core/engines/draw_manager.h" #include "video_core/engines/maxwell_3d.h" @@ -679,17 +680,14 @@ void Maxwell3D::ProcessCBData(u32 value) { Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const GPUVAddr tic_address_gpu{regs.tex_header.Address() + tic_index * sizeof(Texture::TICEntry)}; - Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - return tic_entry; } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + tsc_index * sizeof(Texture::TSCEntry)}; - Texture::TSCEntry tsc_entry; memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index f8598fd98..cd8e24b0b 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -7,6 +7,7 @@ #include "common/microprofile.h" #include "common/settings.h" #include "core/core.h" +#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/memory_manager.h" @@ -130,11 +131,12 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - memory_manager.ReadBlock( - convert_linear_2_blocklinear_addr(regs.offset_in + offset), - read_buffer.data(), read_buffer.size()); - memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), - read_buffer.size()); + Core::Memory::GpuGuestMemoryScoped< + u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp_write_buffer(memory_manager, + convert_linear_2_blocklinear_addr(regs.offset_in + offset), + 16, &read_buffer); + tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); @@ -142,20 +144,19 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), - read_buffer.size()); - memory_manager.WriteBlockCached( - convert_linear_2_blocklinear_addr(regs.offset_out + offset), - read_buffer.data(), read_buffer.size()); + Core::Memory::GpuGuestMemoryScoped< + u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); + tmp_write_buffer.SetAddressAndSize( + convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); } } else { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - read_buffer.resize_destructive(regs.line_length_in); - memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), - regs.line_length_in, - VideoCommon::CacheType::NoBufferCache); - memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), - regs.line_length_in); + Core::Memory::GpuGuestMemoryScoped< + u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, + &read_buffer); + tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); } } } @@ -222,17 +223,15 @@ void MaxwellDMA::CopyBlockLinearToPitch() { CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); const size_t dst_size = dst_operand.pitch * regs.line_count; - read_buffer.resize_destructive(src_size); - write_buffer.resize_destructive(dst_size); - memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); - memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size); + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( + memory_manager, src_operand.address, src_size, &read_buffer); + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); - UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, - src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - dst_operand.pitch); - - memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); + UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, + x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, + block_depth, dst_operand.pitch); } void MaxwellDMA::CopyPitchToBlockLinear() { @@ -287,18 +286,17 @@ void MaxwellDMA::CopyPitchToBlockLinear() { CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; - read_buffer.resize_destructive(src_size); - write_buffer.resize_destructive(dst_size); - - memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); - - // If the input is linear and the output is tiled, swizzle the input and copy it over. - SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, - dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, - regs.pitch_in); - - memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); + GPUVAddr src_addr = regs.offset_in; + GPUVAddr dst_addr = regs.offset_out; + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( + memory_manager, src_addr, src_size, &read_buffer); + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); + + // If the input is linear and the output is tiled, swizzle the input and copy it over. + SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, + x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height, + block_depth, regs.pitch_in); } void MaxwellDMA::CopyBlockLinearToBlockLinear() { @@ -342,23 +340,20 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { const u32 pitch = x_elements * bytes_per_pixel; const size_t mid_buffer_size = pitch * regs.line_count; - read_buffer.resize_destructive(src_size); - write_buffer.resize_destructive(dst_size); - intermediate_buffer.resize_destructive(mid_buffer_size); - memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); - memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( + memory_manager, regs.offset_in, src_size, &read_buffer); + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> + tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); - UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, + UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, src.block_size.height, src.block_size.depth, pitch); - SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, + SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, dst.block_size.height, dst.block_size.depth, pitch); - - memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::ReleaseSemaphore() { diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index ff88cd03d..3a599f466 100644 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp @@ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); const size_t src_size = get_surface_size(src, src_bytes_per_pixel); - impl->tmp_buffer.resize_destructive(src_size); - memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); - const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( + memory_manager, src.Address(), src_size, &impl->tmp_buffer); + const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; impl->src_buffer.resize_destructive(src_copy_size); @@ -200,12 +200,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, impl->dst_buffer.resize_destructive(dst_copy_size); if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { - UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, - src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, - src_extent_y, src.block_height, src.block_depth, - src_extent_x * src_bytes_per_pixel); + UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height, + src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y, + src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel); } else { - process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, + process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); } @@ -221,20 +220,18 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, } const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); - impl->tmp_buffer.resize_destructive(dst_size); - memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> + tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { - SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, - dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, - dst_extent_y, dst.block_height, dst.block_depth, - dst_extent_x * dst_bytes_per_pixel); + SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height, + dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y, + dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel); } else { - process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, + process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y, dst.pitch, config.dst_x0, config.dst_y0, static_cast<size_t>(dst_bytes_per_pixel)); } - memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); return true; } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 45141e488..d16040613 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -10,13 +10,13 @@ #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" -#include "core/memory.h" #include "video_core/invalidation_accumulator.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" namespace Tegra { +using Core::Memory::GuestMemoryFlags; std::atomic<size_t> MemoryManager::unique_identifier_generator{}; @@ -587,13 +587,10 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, VideoCommon::CacheType which) { - tmp_buffer.resize_destructive(size); - ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); - - // The output block must be flushed in case it has data modified from the GPU. - // Fixes NPC geometry in Zombie Panic in Wonderland DX + Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( + *this, gpu_src_addr, size); + data.SetAddressAndSize(gpu_dest_addr, size); FlushRegion(gpu_dest_addr, size, which); - WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); } bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { @@ -758,4 +755,23 @@ void MemoryManager::FlushCaching() { accumulator->Clear(); } +const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { + auto cpu_addr = GpuToCpuAddress(src_addr); + if (cpu_addr) { + return memory.GetSpan(*cpu_addr, size); + } + return nullptr; +} + +u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { + if (!IsContinuousRange(src_addr, size)) { + return nullptr; + } + auto cpu_addr = GpuToCpuAddress(src_addr); + if (cpu_addr) { + return memory.GetSpan(*cpu_addr, size); + } + return nullptr; +} + } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 4202c26ff..9b311b9e5 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -15,6 +15,7 @@ #include "common/range_map.h" #include "common/scratch_buffer.h" #include "common/virtual_buffer.h" +#include "core/memory.h" #include "video_core/cache_types.h" #include "video_core/pte_kind.h" @@ -62,6 +63,20 @@ public: [[nodiscard]] u8* GetPointer(GPUVAddr addr); [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; + template <typename T> + [[nodiscard]] T* GetPointer(GPUVAddr addr) { + const auto address{GpuToCpuAddress(addr)}; + if (!address) { + return {}; + } + return memory.GetPointer(*address); + } + + template <typename T> + [[nodiscard]] const T* GetPointer(GPUVAddr addr) const { + return GetPointer<T*>(addr); + } + /** * ReadBlock and WriteBlock are full read and write operations over virtual * GPU Memory. It's important to use these when GPU memory may not be continuous @@ -139,6 +154,9 @@ public: void FlushCaching(); + const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const; + u8* GetSpan(const GPUVAddr src_addr, const std::size_t size); + private: template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 2d3f58201..4002fa72b 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -38,8 +38,8 @@ void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callb LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request"); return; } - auto async_callback{[callback = std::move(callback)](bool invert_y) { - std::thread t{callback, invert_y}; + auto async_callback{[callback_ = std::move(callback)](bool invert_y) { + std::thread t{callback_, invert_y}; t.detach(); }}; renderer_settings.screenshot_bits = data; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 23a48c6fe..71f720c63 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -231,24 +231,25 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c } const bool in_parallel = thread_worker != nullptr; const auto backend = device.GetShaderBackend(); - auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv), + auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv), shader_notify, backend, in_parallel, force_context_flush](ShaderContext::Context*) mutable { for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { case Settings::ShaderBackend::GLSL: - if (!sources[stage].empty()) { - source_programs[stage] = CreateProgram(sources[stage], Stage(stage)); + if (!sources_[stage].empty()) { + source_programs[stage] = CreateProgram(sources_[stage], Stage(stage)); } break; case Settings::ShaderBackend::GLASM: - if (!sources[stage].empty()) { - assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); + if (!sources_[stage].empty()) { + assembly_programs[stage] = + CompileProgram(sources_[stage], AssemblyStage(stage)); } break; case Settings::ShaderBackend::SPIRV: - if (!sources_spirv[stage].empty()) { - source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage)); + if (!sources_spirv_[stage].empty()) { + source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage)); } break; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0329ed820..7e1d7f92e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -288,9 +288,9 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { ComputePipelineKey key; file.read(reinterpret_cast<char*>(&key), sizeof(key)); - queue_work([this, key, env = std::move(env), &state, &callback](Context* ctx) mutable { + queue_work([this, key, env_ = std::move(env), &state, &callback](Context* ctx) mutable { ctx->pools.ReleaseContents(); - auto pipeline{CreateComputePipeline(ctx->pools, key, env, true)}; + auto pipeline{CreateComputePipeline(ctx->pools, key, env_, true)}; std::scoped_lock lock{state.mutex}; if (pipeline) { compute_cache.emplace(key, std::move(pipeline)); @@ -305,9 +305,9 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { GraphicsPipelineKey key; file.read(reinterpret_cast<char*>(&key), sizeof(key)); - queue_work([this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable { + queue_work([this, key, envs_ = std::move(envs), &state, &callback](Context* ctx) mutable { boost::container::static_vector<Shader::Environment*, 5> env_ptrs; - for (auto& env : envs) { + for (auto& env : envs_) { env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 51df18ec3..f8cd2a5d8 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -206,8 +206,8 @@ public: const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices); const size_t offset = (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type); - scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(buffer, offset, index_type_); + scheduler.Record([buffer_ = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(buffer_, offset, index_type_); }); } @@ -528,17 +528,18 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi buffer_handles.push_back(handle); } if (device.IsExtExtendedDynamicStateSupported()) { - scheduler.Record([bindings = std::move(bindings), - buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers2EXT( - bindings.min_index, bindings.max_index - bindings.min_index, buffer_handles.data(), - bindings.offsets.data(), bindings.sizes.data(), bindings.strides.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers2EXT(bindings_.min_index, + bindings_.max_index - bindings_.min_index, + buffer_handles_.data(), bindings_.offsets.data(), + bindings_.sizes.data(), bindings_.strides.data()); }); } else { - scheduler.Record([bindings = std::move(bindings), - buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindVertexBuffers(bindings.min_index, bindings.max_index - bindings.min_index, - buffer_handles.data(), bindings.offsets.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindVertexBuffers(bindings_.min_index, bindings_.max_index - bindings_.min_index, + buffer_handles_.data(), bindings_.offsets.data()); }); } } @@ -573,11 +574,11 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings< for (u32 index = 0; index < bindings.buffers.size(); ++index) { buffer_handles.push_back(bindings.buffers[index]->Handle()); } - scheduler.Record([bindings = std::move(bindings), - buffer_handles = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { - cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles.size()), - buffer_handles.data(), bindings.offsets.data(), - bindings.sizes.data()); + scheduler.Record([bindings_ = std::move(bindings), + buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) { + cmdbuf.BindTransformFeedbackBuffersEXT(0, static_cast<u32>(buffer_handles_.size()), + buffer_handles_.data(), bindings_.offsets.data(), + bindings_.sizes.data()); }); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d600c4e61..4f84d8497 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -469,9 +469,9 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading ComputePipelineCacheKey key; file.read(reinterpret_cast<char*>(&key), sizeof(key)); - workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + workers.QueueWork([this, key, env_ = std::move(env), &state, &callback]() mutable { ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, env, state.statistics.get(), false)}; + auto pipeline{CreateComputePipeline(pools, key, env_, state.statistics.get(), false)}; std::scoped_lock lock{state.mutex}; if (pipeline) { compute_cache.emplace(key, std::move(pipeline)); @@ -500,10 +500,10 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading (key.state.dynamic_vertex_input != 0) != dynamic_features.has_dynamic_vertex_input) { return; } - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + workers.QueueWork([this, key, envs_ = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector<Shader::Environment*, 5> env_ptrs; - for (auto& env : envs) { + for (auto& env : envs_) { env_ptrs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), @@ -702,8 +702,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( if (!pipeline || pipeline_cache_filename.empty()) { return pipeline; } - serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, + serialization_thread.QueueWork([this, key, env_ = std::move(env)] { + SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env_}, pipeline_cache_filename, CACHE_VERSION); }); return pipeline; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index d67490449..29e0b797b 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -98,10 +98,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend : HostCounterBase{std::move(dependency_)}, cache{cache_}, type{type_}, query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { const vk::Device* logical = &cache.GetDevice().GetLogical(); - cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { + cache.GetScheduler().Record([logical, query_ = query](vk::CommandBuffer cmdbuf) { const bool use_precise = Settings::IsGPULevelHigh(); - logical->ResetQueryPool(query.first, query.second, 1); - cmdbuf.BeginQuery(query.first, query.second, + logical->ResetQueryPool(query_.first, query_.second, 1); + cmdbuf.BeginQuery(query_.first, query_.second, use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0); }); } @@ -111,8 +111,9 @@ HostCounter::~HostCounter() { } void HostCounter::EndQuery() { - cache.GetScheduler().Record( - [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); + cache.GetScheduler().Record([query_ = query](vk::CommandBuffer cmdbuf) { + cmdbuf.EndQuery(query_.first, query_.second); + }); } u64 HostCounter::BlockingQuery(bool async) const { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 3aac3cfab..bf6ad6c79 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1412,7 +1412,7 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS } scheduler->RequestOutsideRenderPassOperationContext(); scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, - aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { + aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -1424,7 +1424,7 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = image, .subresourceRange{ - .aspectMask = aspect_mask, + .aspectMask = aspect_mask_, .baseMipLevel = 0, .levelCount = VK_REMAINING_MIP_LEVELS, .baseArrayLayer = 0, @@ -1456,7 +1456,7 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = image, .subresourceRange{ - .aspectMask = aspect_mask, + .aspectMask = aspect_mask_, .baseMipLevel = 0, .levelCount = VK_REMAINING_MIP_LEVELS, .baseArrayLayer = 0, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3a859139c..4457b366f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,6 +8,7 @@ #include "common/alignment.h" #include "common/settings.h" +#include "core/memory.h" #include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" @@ -1026,19 +1027,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) runtime.AccelerateImageUpload(image, staging, uploads); return; } - const size_t guest_size_bytes = image.guest_size_bytes; - swizzle_data_buffer.resize_destructive(guest_size_bytes); - gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); + + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( + *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); if (True(image.flags & ImageFlagBits::Converted)) { unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, - unswizzle_data_buffer); + auto copies = + UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); image.UploadMemory(staging, copies); } else { const auto copies = - UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); + UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); image.UploadMemory(staging, copies); } } @@ -1231,11 +1232,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { decode->image_id = image_id; async_decodes.push_back(std::move(decode)); - Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); - const size_t guest_size_bytes = image.guest_size_bytes; - swizzle_data_buffer.resize_destructive(guest_size_bytes); - gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); - auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, + static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; + local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); + Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( + *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); + + auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, local_unswizzle_data_buffer); const size_t out_size = MapSizeBytes(image); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0de6ed09d..a83f5d41c 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -20,6 +20,7 @@ #include "common/div_ceil.h" #include "common/scratch_buffer.h" #include "common/settings.h" +#include "core/memory.h" #include "video_core/compatible_formats.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr tile_size.height, info.tile_width_spacing); const size_t subresource_size = sizes[level]; - tmp_buffer.resize_destructive(subresource_size); - const std::span<u8> dst(tmp_buffer); - for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span<const u8> src = input.subspan(host_offset); - gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); - - SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, - num_tiles.depth, block.height, block.depth); + { + Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> + dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); - gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth); + } host_offset += host_bytes_per_layer; guest_offset += layer_stride; @@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory const Extent3D size = info.size; if (info.type == ImageType::Linear) { + ASSERT(output.size_bytes() >= guest_size_bytes); gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); @@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory return copies; } -BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageBase& image, std::span<u8> output) { - gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); - return BufferCopy{ - .src_offset = 0, - .dst_offset = 0, - .size = image.guest_size_bytes, - }; -} - void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies) { u32 output_offset = 0; diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index ab45a43c4..5a0649d24 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -66,9 +66,6 @@ struct OverlapResult { Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span<const u8> input, std::span<u8> output); -[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, - const ImageBase& image, std::span<u8> output); - void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies); diff --git a/src/video_core/vulkan_common/vma.cpp b/src/video_core/vulkan_common/vma.cpp new file mode 100644 index 000000000..1fe2cf52b --- /dev/null +++ b/src/video_core/vulkan_common/vma.cpp @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#define VMA_IMPLEMENTATION +#define VMA_STATIC_VULKAN_FUNCTIONS 0 +#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 + +#include <vk_mem_alloc.h>
\ No newline at end of file |
